xfs_trans.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
xfs_trans.c (37079B)
      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
      4 * Copyright (C) 2010 Red Hat, Inc.
      5 * All Rights Reserved.
      6 */
      7#include "xfs.h"
      8#include "xfs_fs.h"
      9#include "xfs_shared.h"
     10#include "xfs_format.h"
     11#include "xfs_log_format.h"
     12#include "xfs_trans_resv.h"
     13#include "xfs_mount.h"
     14#include "xfs_extent_busy.h"
     15#include "xfs_quota.h"
     16#include "xfs_trans.h"
     17#include "xfs_trans_priv.h"
     18#include "xfs_log.h"
     19#include "xfs_log_priv.h"
     20#include "xfs_trace.h"
     21#include "xfs_error.h"
     22#include "xfs_defer.h"
     23#include "xfs_inode.h"
     24#include "xfs_dquot_item.h"
     25#include "xfs_dquot.h"
     26#include "xfs_icache.h"
     27
     28struct kmem_cache	*xfs_trans_cache;
     29
     30#if defined(CONFIG_TRACEPOINTS)
     31static void
     32xfs_trans_trace_reservations(
     33	struct xfs_mount	*mp)
     34{
     35	struct xfs_trans_res	*res;
     36	struct xfs_trans_res	*end_res;
     37	int			i;
     38
     39	res = (struct xfs_trans_res *)M_RES(mp);
     40	end_res = (struct xfs_trans_res *)(M_RES(mp) + 1);
     41	for (i = 0; res < end_res; i++, res++)
     42		trace_xfs_trans_resv_calc(mp, i, res);
     43}
     44#else
     45# define xfs_trans_trace_reservations(mp)
     46#endif
     47
     48/*
     49 * Initialize the precomputed transaction reservation values
     50 * in the mount structure.
     51 */
     52void
     53xfs_trans_init(
     54	struct xfs_mount	*mp)
     55{
     56	xfs_trans_resv_calc(mp, M_RES(mp));
     57	xfs_trans_trace_reservations(mp);
     58}
     59
     60/*
     61 * Free the transaction structure.  If there is more clean up
     62 * to do when the structure is freed, add it here.
     63 */
     64STATIC void
     65xfs_trans_free(
     66	struct xfs_trans	*tp)
     67{
     68	xfs_extent_busy_sort(&tp->t_busy);
     69	xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
     70
     71	trace_xfs_trans_free(tp, _RET_IP_);
     72	xfs_trans_clear_context(tp);
     73	if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
     74		sb_end_intwrite(tp->t_mountp->m_super);
     75	xfs_trans_free_dqinfo(tp);
     76	kmem_cache_free(xfs_trans_cache, tp);
     77}
     78
     79/*
     80 * This is called to create a new transaction which will share the
     81 * permanent log reservation of the given transaction.  The remaining
     82 * unused block and rt extent reservations are also inherited.  This
     83 * implies that the original transaction is no longer allowed to allocate
     84 * blocks.  Locks and log items, however, are no inherited.  They must
     85 * be added to the new transaction explicitly.
     86 */
     87STATIC struct xfs_trans *
     88xfs_trans_dup(
     89	struct xfs_trans	*tp)
     90{
     91	struct xfs_trans	*ntp;
     92
     93	trace_xfs_trans_dup(tp, _RET_IP_);
     94
     95	ntp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL);
     96
     97	/*
     98	 * Initialize the new transaction structure.
     99	 */
    100	ntp->t_magic = XFS_TRANS_HEADER_MAGIC;
    101	ntp->t_mountp = tp->t_mountp;
    102	INIT_LIST_HEAD(&ntp->t_items);
    103	INIT_LIST_HEAD(&ntp->t_busy);
    104	INIT_LIST_HEAD(&ntp->t_dfops);
    105	ntp->t_firstblock = NULLFSBLOCK;
    106
    107	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
    108	ASSERT(tp->t_ticket != NULL);
    109
    110	ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
    111		       (tp->t_flags & XFS_TRANS_RESERVE) |
    112		       (tp->t_flags & XFS_TRANS_NO_WRITECOUNT) |
    113		       (tp->t_flags & XFS_TRANS_RES_FDBLKS);
    114	/* We gave our writer reference to the new transaction */
    115	tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;
    116	ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
    117
    118	ASSERT(tp->t_blk_res >= tp->t_blk_res_used);
    119	ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
    120	tp->t_blk_res = tp->t_blk_res_used;
    121
    122	ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
    123	tp->t_rtx_res = tp->t_rtx_res_used;
    124
    125	xfs_trans_switch_context(tp, ntp);
    126
    127	/* move deferred ops over to the new tp */
    128	xfs_defer_move(ntp, tp);
    129
    130	xfs_trans_dup_dqinfo(tp, ntp);
    131	return ntp;
    132}
    133
    134/*
    135 * This is called to reserve free disk blocks and log space for the
    136 * given transaction.  This must be done before allocating any resources
    137 * within the transaction.
    138 *
    139 * This will return ENOSPC if there are not enough blocks available.
    140 * It will sleep waiting for available log space.
    141 * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which
    142 * is used by long running transactions.  If any one of the reservations
    143 * fails then they will all be backed out.
    144 *
    145 * This does not do quota reservations. That typically is done by the
    146 * caller afterwards.
    147 */
    148static int
    149xfs_trans_reserve(
    150	struct xfs_trans	*tp,
    151	struct xfs_trans_res	*resp,
    152	uint			blocks,
    153	uint			rtextents)
    154{
    155	struct xfs_mount	*mp = tp->t_mountp;
    156	int			error = 0;
    157	bool			rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
    158
    159	/*
    160	 * Attempt to reserve the needed disk blocks by decrementing
    161	 * the number needed from the number available.  This will
    162	 * fail if the count would go below zero.
    163	 */
    164	if (blocks > 0) {
    165		error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
    166		if (error != 0)
    167			return -ENOSPC;
    168		tp->t_blk_res += blocks;
    169	}
    170
    171	/*
    172	 * Reserve the log space needed for this transaction.
    173	 */
    174	if (resp->tr_logres > 0) {
    175		bool	permanent = false;
    176
    177		ASSERT(tp->t_log_res == 0 ||
    178		       tp->t_log_res == resp->tr_logres);
    179		ASSERT(tp->t_log_count == 0 ||
    180		       tp->t_log_count == resp->tr_logcount);
    181
    182		if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
    183			tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
    184			permanent = true;
    185		} else {
    186			ASSERT(tp->t_ticket == NULL);
    187			ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
    188		}
    189
    190		if (tp->t_ticket != NULL) {
    191			ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES);
    192			error = xfs_log_regrant(mp, tp->t_ticket);
    193		} else {
    194			error = xfs_log_reserve(mp, resp->tr_logres,
    195						resp->tr_logcount,
    196						&tp->t_ticket, permanent);
    197		}
    198
    199		if (error)
    200			goto undo_blocks;
    201
    202		tp->t_log_res = resp->tr_logres;
    203		tp->t_log_count = resp->tr_logcount;
    204	}
    205
    206	/*
    207	 * Attempt to reserve the needed realtime extents by decrementing
    208	 * the number needed from the number available.  This will
    209	 * fail if the count would go below zero.
    210	 */
    211	if (rtextents > 0) {
    212		error = xfs_mod_frextents(mp, -((int64_t)rtextents));
    213		if (error) {
    214			error = -ENOSPC;
    215			goto undo_log;
    216		}
    217		tp->t_rtx_res += rtextents;
    218	}
    219
    220	return 0;
    221
    222	/*
    223	 * Error cases jump to one of these labels to undo any
    224	 * reservations which have already been performed.
    225	 */
    226undo_log:
    227	if (resp->tr_logres > 0) {
    228		xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
    229		tp->t_ticket = NULL;
    230		tp->t_log_res = 0;
    231		tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
    232	}
    233
    234undo_blocks:
    235	if (blocks > 0) {
    236		xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
    237		tp->t_blk_res = 0;
    238	}
    239	return error;
    240}
    241
    242int
    243xfs_trans_alloc(
    244	struct xfs_mount	*mp,
    245	struct xfs_trans_res	*resp,
    246	uint			blocks,
    247	uint			rtextents,
    248	uint			flags,
    249	struct xfs_trans	**tpp)
    250{
    251	struct xfs_trans	*tp;
    252	bool			want_retry = true;
    253	int			error;
    254
    255	/*
    256	 * Allocate the handle before we do our freeze accounting and setting up
    257	 * GFP_NOFS allocation context so that we avoid lockdep false positives
    258	 * by doing GFP_KERNEL allocations inside sb_start_intwrite().
    259	 */
    260retry:
    261	tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL);
    262	if (!(flags & XFS_TRANS_NO_WRITECOUNT))
    263		sb_start_intwrite(mp->m_super);
    264	xfs_trans_set_context(tp);
    265
    266	/*
    267	 * Zero-reservation ("empty") transactions can't modify anything, so
    268	 * they're allowed to run while we're frozen.
    269	 */
    270	WARN_ON(resp->tr_logres > 0 &&
    271		mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
    272	ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) ||
    273	       xfs_has_lazysbcount(mp));
    274
    275	tp->t_magic = XFS_TRANS_HEADER_MAGIC;
    276	tp->t_flags = flags;
    277	tp->t_mountp = mp;
    278	INIT_LIST_HEAD(&tp->t_items);
    279	INIT_LIST_HEAD(&tp->t_busy);
    280	INIT_LIST_HEAD(&tp->t_dfops);
    281	tp->t_firstblock = NULLFSBLOCK;
    282
    283	error = xfs_trans_reserve(tp, resp, blocks, rtextents);
    284	if (error == -ENOSPC && want_retry) {
    285		xfs_trans_cancel(tp);
    286
    287		/*
    288		 * We weren't able to reserve enough space for the transaction.
    289		 * Flush the other speculative space allocations to free space.
    290		 * Do not perform a synchronous scan because callers can hold
    291		 * other locks.
    292		 */
    293		xfs_blockgc_flush_all(mp);
    294		want_retry = false;
    295		goto retry;
    296	}
    297	if (error) {
    298		xfs_trans_cancel(tp);
    299		return error;
    300	}
    301
    302	trace_xfs_trans_alloc(tp, _RET_IP_);
    303
    304	*tpp = tp;
    305	return 0;
    306}
    307
    308/*
    309 * Create an empty transaction with no reservation.  This is a defensive
    310 * mechanism for routines that query metadata without actually modifying them --
    311 * if the metadata being queried is somehow cross-linked (think a btree block
    312 * pointer that points higher in the tree), we risk deadlock.  However, blocks
    313 * grabbed as part of a transaction can be re-grabbed.  The verifiers will
    314 * notice the corrupt block and the operation will fail back to userspace
    315 * without deadlocking.
    316 *
    317 * Note the zero-length reservation; this transaction MUST be cancelled without
    318 * any dirty data.
    319 *
    320 * Callers should obtain freeze protection to avoid a conflict with fs freezing
    321 * where we can be grabbing buffers at the same time that freeze is trying to
    322 * drain the buffer LRU list.
    323 */
    324int
    325xfs_trans_alloc_empty(
    326	struct xfs_mount		*mp,
    327	struct xfs_trans		**tpp)
    328{
    329	struct xfs_trans_res		resv = {0};
    330
    331	return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp);
    332}
    333
    334/*
    335 * Record the indicated change to the given field for application
    336 * to the file system's superblock when the transaction commits.
    337 * For now, just store the change in the transaction structure.
    338 *
    339 * Mark the transaction structure to indicate that the superblock
    340 * needs to be updated before committing.
    341 *
    342 * Because we may not be keeping track of allocated/free inodes and
    343 * used filesystem blocks in the superblock, we do not mark the
    344 * superblock dirty in this transaction if we modify these fields.
    345 * We still need to update the transaction deltas so that they get
    346 * applied to the incore superblock, but we don't want them to
    347 * cause the superblock to get locked and logged if these are the
    348 * only fields in the superblock that the transaction modifies.
    349 */
    350void
    351xfs_trans_mod_sb(
    352	xfs_trans_t	*tp,
    353	uint		field,
    354	int64_t		delta)
    355{
    356	uint32_t	flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY);
    357	xfs_mount_t	*mp = tp->t_mountp;
    358
    359	switch (field) {
    360	case XFS_TRANS_SB_ICOUNT:
    361		tp->t_icount_delta += delta;
    362		if (xfs_has_lazysbcount(mp))
    363			flags &= ~XFS_TRANS_SB_DIRTY;
    364		break;
    365	case XFS_TRANS_SB_IFREE:
    366		tp->t_ifree_delta += delta;
    367		if (xfs_has_lazysbcount(mp))
    368			flags &= ~XFS_TRANS_SB_DIRTY;
    369		break;
    370	case XFS_TRANS_SB_FDBLOCKS:
    371		/*
    372		 * Track the number of blocks allocated in the transaction.
    373		 * Make sure it does not exceed the number reserved. If so,
    374		 * shutdown as this can lead to accounting inconsistency.
    375		 */
    376		if (delta < 0) {
    377			tp->t_blk_res_used += (uint)-delta;
    378			if (tp->t_blk_res_used > tp->t_blk_res)
    379				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
    380		} else if (delta > 0 && (tp->t_flags & XFS_TRANS_RES_FDBLKS)) {
    381			int64_t	blkres_delta;
    382
    383			/*
    384			 * Return freed blocks directly to the reservation
    385			 * instead of the global pool, being careful not to
    386			 * overflow the trans counter. This is used to preserve
    387			 * reservation across chains of transaction rolls that
    388			 * repeatedly free and allocate blocks.
    389			 */
    390			blkres_delta = min_t(int64_t, delta,
    391					     UINT_MAX - tp->t_blk_res);
    392			tp->t_blk_res += blkres_delta;
    393			delta -= blkres_delta;
    394		}
    395		tp->t_fdblocks_delta += delta;
    396		if (xfs_has_lazysbcount(mp))
    397			flags &= ~XFS_TRANS_SB_DIRTY;
    398		break;
    399	case XFS_TRANS_SB_RES_FDBLOCKS:
    400		/*
    401		 * The allocation has already been applied to the
    402		 * in-core superblock's counter.  This should only
    403		 * be applied to the on-disk superblock.
    404		 */
    405		tp->t_res_fdblocks_delta += delta;
    406		if (xfs_has_lazysbcount(mp))
    407			flags &= ~XFS_TRANS_SB_DIRTY;
    408		break;
    409	case XFS_TRANS_SB_FREXTENTS:
    410		/*
    411		 * Track the number of blocks allocated in the
    412		 * transaction.  Make sure it does not exceed the
    413		 * number reserved.
    414		 */
    415		if (delta < 0) {
    416			tp->t_rtx_res_used += (uint)-delta;
    417			ASSERT(tp->t_rtx_res_used <= tp->t_rtx_res);
    418		}
    419		tp->t_frextents_delta += delta;
    420		break;
    421	case XFS_TRANS_SB_RES_FREXTENTS:
    422		/*
    423		 * The allocation has already been applied to the
    424		 * in-core superblock's counter.  This should only
    425		 * be applied to the on-disk superblock.
    426		 */
    427		ASSERT(delta < 0);
    428		tp->t_res_frextents_delta += delta;
    429		break;
    430	case XFS_TRANS_SB_DBLOCKS:
    431		tp->t_dblocks_delta += delta;
    432		break;
    433	case XFS_TRANS_SB_AGCOUNT:
    434		ASSERT(delta > 0);
    435		tp->t_agcount_delta += delta;
    436		break;
    437	case XFS_TRANS_SB_IMAXPCT:
    438		tp->t_imaxpct_delta += delta;
    439		break;
    440	case XFS_TRANS_SB_REXTSIZE:
    441		tp->t_rextsize_delta += delta;
    442		break;
    443	case XFS_TRANS_SB_RBMBLOCKS:
    444		tp->t_rbmblocks_delta += delta;
    445		break;
    446	case XFS_TRANS_SB_RBLOCKS:
    447		tp->t_rblocks_delta += delta;
    448		break;
    449	case XFS_TRANS_SB_REXTENTS:
    450		tp->t_rextents_delta += delta;
    451		break;
    452	case XFS_TRANS_SB_REXTSLOG:
    453		tp->t_rextslog_delta += delta;
    454		break;
    455	default:
    456		ASSERT(0);
    457		return;
    458	}
    459
    460	tp->t_flags |= flags;
    461}
    462
    463/*
    464 * xfs_trans_apply_sb_deltas() is called from the commit code
    465 * to bring the superblock buffer into the current transaction
    466 * and modify it as requested by earlier calls to xfs_trans_mod_sb().
    467 *
    468 * For now we just look at each field allowed to change and change
    469 * it if necessary.
    470 */
    471STATIC void
    472xfs_trans_apply_sb_deltas(
    473	xfs_trans_t	*tp)
    474{
    475	struct xfs_dsb	*sbp;
    476	struct xfs_buf	*bp;
    477	int		whole = 0;
    478
    479	bp = xfs_trans_getsb(tp);
    480	sbp = bp->b_addr;
    481
    482	/*
    483	 * Only update the superblock counters if we are logging them
    484	 */
    485	if (!xfs_has_lazysbcount((tp->t_mountp))) {
    486		if (tp->t_icount_delta)
    487			be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta);
    488		if (tp->t_ifree_delta)
    489			be64_add_cpu(&sbp->sb_ifree, tp->t_ifree_delta);
    490		if (tp->t_fdblocks_delta)
    491			be64_add_cpu(&sbp->sb_fdblocks, tp->t_fdblocks_delta);
    492		if (tp->t_res_fdblocks_delta)
    493			be64_add_cpu(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta);
    494	}
    495
    496	/*
    497	 * Updating frextents requires careful handling because it does not
    498	 * behave like the lazysb counters because we cannot rely on log
    499	 * recovery in older kenels to recompute the value from the rtbitmap.
    500	 * This means that the ondisk frextents must be consistent with the
    501	 * rtbitmap.
    502	 *
    503	 * Therefore, log the frextents change to the ondisk superblock and
    504	 * update the incore superblock so that future calls to xfs_log_sb
    505	 * write the correct value ondisk.
    506	 *
    507	 * Don't touch m_frextents because it includes incore reservations,
    508	 * and those are handled by the unreserve function.
    509	 */
    510	if (tp->t_frextents_delta || tp->t_res_frextents_delta) {
    511		struct xfs_mount	*mp = tp->t_mountp;
    512		int64_t			rtxdelta;
    513
    514		rtxdelta = tp->t_frextents_delta + tp->t_res_frextents_delta;
    515
    516		spin_lock(&mp->m_sb_lock);
    517		be64_add_cpu(&sbp->sb_frextents, rtxdelta);
    518		mp->m_sb.sb_frextents += rtxdelta;
    519		spin_unlock(&mp->m_sb_lock);
    520	}
    521
    522	if (tp->t_dblocks_delta) {
    523		be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta);
    524		whole = 1;
    525	}
    526	if (tp->t_agcount_delta) {
    527		be32_add_cpu(&sbp->sb_agcount, tp->t_agcount_delta);
    528		whole = 1;
    529	}
    530	if (tp->t_imaxpct_delta) {
    531		sbp->sb_imax_pct += tp->t_imaxpct_delta;
    532		whole = 1;
    533	}
    534	if (tp->t_rextsize_delta) {
    535		be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta);
    536		whole = 1;
    537	}
    538	if (tp->t_rbmblocks_delta) {
    539		be32_add_cpu(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta);
    540		whole = 1;
    541	}
    542	if (tp->t_rblocks_delta) {
    543		be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta);
    544		whole = 1;
    545	}
    546	if (tp->t_rextents_delta) {
    547		be64_add_cpu(&sbp->sb_rextents, tp->t_rextents_delta);
    548		whole = 1;
    549	}
    550	if (tp->t_rextslog_delta) {
    551		sbp->sb_rextslog += tp->t_rextslog_delta;
    552		whole = 1;
    553	}
    554
    555	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
    556	if (whole)
    557		/*
    558		 * Log the whole thing, the fields are noncontiguous.
    559		 */
    560		xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1);
    561	else
    562		/*
    563		 * Since all the modifiable fields are contiguous, we
    564		 * can get away with this.
    565		 */
    566		xfs_trans_log_buf(tp, bp, offsetof(struct xfs_dsb, sb_icount),
    567				  offsetof(struct xfs_dsb, sb_frextents) +
    568				  sizeof(sbp->sb_frextents) - 1);
    569}
    570
    571/*
    572 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations and
    573 * apply superblock counter changes to the in-core superblock.  The
    574 * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
    575 * applied to the in-core superblock.  The idea is that that has already been
    576 * done.
    577 *
    578 * If we are not logging superblock counters, then the inode allocated/free and
    579 * used block counts are not updated in the on disk superblock. In this case,
    580 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
    581 * still need to update the incore superblock with the changes.
    582 *
    583 * Deltas for the inode count are +/-64, hence we use a large batch size of 128
    584 * so we don't need to take the counter lock on every update.
    585 */
    586#define XFS_ICOUNT_BATCH	128
    587
    588void
    589xfs_trans_unreserve_and_mod_sb(
    590	struct xfs_trans	*tp)
    591{
    592	struct xfs_mount	*mp = tp->t_mountp;
    593	bool			rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
    594	int64_t			blkdelta = 0;
    595	int64_t			rtxdelta = 0;
    596	int64_t			idelta = 0;
    597	int64_t			ifreedelta = 0;
    598	int			error;
    599
    600	/* calculate deltas */
    601	if (tp->t_blk_res > 0)
    602		blkdelta = tp->t_blk_res;
    603	if ((tp->t_fdblocks_delta != 0) &&
    604	    (xfs_has_lazysbcount(mp) ||
    605	     (tp->t_flags & XFS_TRANS_SB_DIRTY)))
    606	        blkdelta += tp->t_fdblocks_delta;
    607
    608	if (tp->t_rtx_res > 0)
    609		rtxdelta = tp->t_rtx_res;
    610	if ((tp->t_frextents_delta != 0) &&
    611	    (tp->t_flags & XFS_TRANS_SB_DIRTY))
    612		rtxdelta += tp->t_frextents_delta;
    613
    614	if (xfs_has_lazysbcount(mp) ||
    615	     (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
    616		idelta = tp->t_icount_delta;
    617		ifreedelta = tp->t_ifree_delta;
    618	}
    619
    620	/* apply the per-cpu counters */
    621	if (blkdelta) {
    622		error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
    623		ASSERT(!error);
    624	}
    625
    626	if (idelta)
    627		percpu_counter_add_batch(&mp->m_icount, idelta,
    628					 XFS_ICOUNT_BATCH);
    629
    630	if (ifreedelta)
    631		percpu_counter_add(&mp->m_ifree, ifreedelta);
    632
    633	if (rtxdelta) {
    634		error = xfs_mod_frextents(mp, rtxdelta);
    635		ASSERT(!error);
    636	}
    637
    638	if (!(tp->t_flags & XFS_TRANS_SB_DIRTY))
    639		return;
    640
    641	/* apply remaining deltas */
    642	spin_lock(&mp->m_sb_lock);
    643	mp->m_sb.sb_fdblocks += tp->t_fdblocks_delta + tp->t_res_fdblocks_delta;
    644	mp->m_sb.sb_icount += idelta;
    645	mp->m_sb.sb_ifree += ifreedelta;
    646	/*
    647	 * Do not touch sb_frextents here because we are dealing with incore
    648	 * reservation.  sb_frextents is not part of the lazy sb counters so it
    649	 * must be consistent with the ondisk rtbitmap and must never include
    650	 * incore reservations.
    651	 */
    652	mp->m_sb.sb_dblocks += tp->t_dblocks_delta;
    653	mp->m_sb.sb_agcount += tp->t_agcount_delta;
    654	mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta;
    655	mp->m_sb.sb_rextsize += tp->t_rextsize_delta;
    656	mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta;
    657	mp->m_sb.sb_rblocks += tp->t_rblocks_delta;
    658	mp->m_sb.sb_rextents += tp->t_rextents_delta;
    659	mp->m_sb.sb_rextslog += tp->t_rextslog_delta;
    660	spin_unlock(&mp->m_sb_lock);
    661
    662	/*
    663	 * Debug checks outside of the spinlock so they don't lock up the
    664	 * machine if they fail.
    665	 */
    666	ASSERT(mp->m_sb.sb_imax_pct >= 0);
    667	ASSERT(mp->m_sb.sb_rextslog >= 0);
    668	return;
    669}
    670
    671/* Add the given log item to the transaction's list of log items. */
    672void
    673xfs_trans_add_item(
    674	struct xfs_trans	*tp,
    675	struct xfs_log_item	*lip)
    676{
    677	ASSERT(lip->li_log == tp->t_mountp->m_log);
    678	ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
    679	ASSERT(list_empty(&lip->li_trans));
    680	ASSERT(!test_bit(XFS_LI_DIRTY, &lip->li_flags));
    681
    682	list_add_tail(&lip->li_trans, &tp->t_items);
    683	trace_xfs_trans_add_item(tp, _RET_IP_);
    684}
    685
    686/*
    687 * Unlink the log item from the transaction. the log item is no longer
    688 * considered dirty in this transaction, as the linked transaction has
    689 * finished, either by abort or commit completion.
    690 */
    691void
    692xfs_trans_del_item(
    693	struct xfs_log_item	*lip)
    694{
    695	clear_bit(XFS_LI_DIRTY, &lip->li_flags);
    696	list_del_init(&lip->li_trans);
    697}
    698
    699/* Detach and unlock all of the items in a transaction */
    700static void
    701xfs_trans_free_items(
    702	struct xfs_trans	*tp,
    703	bool			abort)
    704{
    705	struct xfs_log_item	*lip, *next;
    706
    707	trace_xfs_trans_free_items(tp, _RET_IP_);
    708
    709	list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
    710		xfs_trans_del_item(lip);
    711		if (abort)
    712			set_bit(XFS_LI_ABORTED, &lip->li_flags);
    713		if (lip->li_ops->iop_release)
    714			lip->li_ops->iop_release(lip);
    715	}
    716}
    717
    718static inline void
    719xfs_log_item_batch_insert(
    720	struct xfs_ail		*ailp,
    721	struct xfs_ail_cursor	*cur,
    722	struct xfs_log_item	**log_items,
    723	int			nr_items,
    724	xfs_lsn_t		commit_lsn)
    725{
    726	int	i;
    727
    728	spin_lock(&ailp->ail_lock);
    729	/* xfs_trans_ail_update_bulk drops ailp->ail_lock */
    730	xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
    731
    732	for (i = 0; i < nr_items; i++) {
    733		struct xfs_log_item *lip = log_items[i];
    734
    735		if (lip->li_ops->iop_unpin)
    736			lip->li_ops->iop_unpin(lip, 0);
    737	}
    738}
    739
    740/*
    741 * Bulk operation version of xfs_trans_committed that takes a log vector of
    742 * items to insert into the AIL. This uses bulk AIL insertion techniques to
    743 * minimise lock traffic.
    744 *
    745 * If we are called with the aborted flag set, it is because a log write during
    746 * a CIL checkpoint commit has failed. In this case, all the items in the
    747 * checkpoint have already gone through iop_committed and iop_committing, which
    748 * means that checkpoint commit abort handling is treated exactly the same
    749 * as an iclog write error even though we haven't started any IO yet. Hence in
    750 * this case all we need to do is iop_committed processing, followed by an
    751 * iop_unpin(aborted) call.
    752 *
    753 * The AIL cursor is used to optimise the insert process. If commit_lsn is not
    754 * at the end of the AIL, the insert cursor avoids the need to walk
    755 * the AIL to find the insertion point on every xfs_log_item_batch_insert()
    756 * call. This saves a lot of needless list walking and is a net win, even
    757 * though it slightly increases that amount of AIL lock traffic to set it up
    758 * and tear it down.
    759 */
    760void
    761xfs_trans_committed_bulk(
    762	struct xfs_ail		*ailp,
    763	struct xfs_log_vec	*log_vector,
    764	xfs_lsn_t		commit_lsn,
    765	bool			aborted)
    766{
    767#define LOG_ITEM_BATCH_SIZE	32
    768	struct xfs_log_item	*log_items[LOG_ITEM_BATCH_SIZE];
    769	struct xfs_log_vec	*lv;
    770	struct xfs_ail_cursor	cur;
    771	int			i = 0;
    772
    773	spin_lock(&ailp->ail_lock);
    774	xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
    775	spin_unlock(&ailp->ail_lock);
    776
    777	/* unpin all the log items */
    778	for (lv = log_vector; lv; lv = lv->lv_next ) {
    779		struct xfs_log_item	*lip = lv->lv_item;
    780		xfs_lsn_t		item_lsn;
    781
    782		if (aborted)
    783			set_bit(XFS_LI_ABORTED, &lip->li_flags);
    784
    785		if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
    786			lip->li_ops->iop_release(lip);
    787			continue;
    788		}
    789
    790		if (lip->li_ops->iop_committed)
    791			item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
    792		else
    793			item_lsn = commit_lsn;
    794
    795		/* item_lsn of -1 means the item needs no further processing */
    796		if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
    797			continue;
    798
    799		/*
    800		 * if we are aborting the operation, no point in inserting the
    801		 * object into the AIL as we are in a shutdown situation.
    802		 */
    803		if (aborted) {
    804			ASSERT(xlog_is_shutdown(ailp->ail_log));
    805			if (lip->li_ops->iop_unpin)
    806				lip->li_ops->iop_unpin(lip, 1);
    807			continue;
    808		}
    809
    810		if (item_lsn != commit_lsn) {
    811
    812			/*
    813			 * Not a bulk update option due to unusual item_lsn.
    814			 * Push into AIL immediately, rechecking the lsn once
    815			 * we have the ail lock. Then unpin the item. This does
    816			 * not affect the AIL cursor the bulk insert path is
    817			 * using.
    818			 */
    819			spin_lock(&ailp->ail_lock);
    820			if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
    821				xfs_trans_ail_update(ailp, lip, item_lsn);
    822			else
    823				spin_unlock(&ailp->ail_lock);
    824			if (lip->li_ops->iop_unpin)
    825				lip->li_ops->iop_unpin(lip, 0);
    826			continue;
    827		}
    828
    829		/* Item is a candidate for bulk AIL insert.  */
    830		log_items[i++] = lv->lv_item;
    831		if (i >= LOG_ITEM_BATCH_SIZE) {
    832			xfs_log_item_batch_insert(ailp, &cur, log_items,
    833					LOG_ITEM_BATCH_SIZE, commit_lsn);
    834			i = 0;
    835		}
    836	}
    837
    838	/* make sure we insert the remainder! */
    839	if (i)
    840		xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
    841
    842	spin_lock(&ailp->ail_lock);
    843	xfs_trans_ail_cursor_done(&cur);
    844	spin_unlock(&ailp->ail_lock);
    845}
    846
    847/*
    848 * Commit the given transaction to the log.
    849 *
    850 * XFS disk error handling mechanism is not based on a typical
    851 * transaction abort mechanism. Logically after the filesystem
    852 * gets marked 'SHUTDOWN', we can't let any new transactions
    853 * be durable - ie. committed to disk - because some metadata might
    854 * be inconsistent. In such cases, this returns an error, and the
    855 * caller may assume that all locked objects joined to the transaction
    856 * have already been unlocked as if the commit had succeeded.
    857 * Do not reference the transaction structure after this call.
    858 */
    859static int
    860__xfs_trans_commit(
    861	struct xfs_trans	*tp,
    862	bool			regrant)
    863{
    864	struct xfs_mount	*mp = tp->t_mountp;
    865	struct xlog		*log = mp->m_log;
    866	xfs_csn_t		commit_seq = 0;
    867	int			error = 0;
    868	int			sync = tp->t_flags & XFS_TRANS_SYNC;
    869
    870	trace_xfs_trans_commit(tp, _RET_IP_);
    871
    872	/*
    873	 * Finish deferred items on final commit. Only permanent transactions
    874	 * should ever have deferred ops.
    875	 */
    876	WARN_ON_ONCE(!list_empty(&tp->t_dfops) &&
    877		     !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
    878	if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) {
    879		error = xfs_defer_finish_noroll(&tp);
    880		if (error)
    881			goto out_unreserve;
    882	}
    883
    884	/*
    885	 * If there is nothing to be logged by the transaction,
    886	 * then unlock all of the items associated with the
    887	 * transaction and free the transaction structure.
    888	 * Also make sure to return any reserved blocks to
    889	 * the free pool.
    890	 */
    891	if (!(tp->t_flags & XFS_TRANS_DIRTY))
    892		goto out_unreserve;
    893
    894	/*
    895	 * We must check against log shutdown here because we cannot abort log
    896	 * items and leave them dirty, inconsistent and unpinned in memory while
    897	 * the log is active. This leaves them open to being written back to
    898	 * disk, and that will lead to on-disk corruption.
    899	 */
    900	if (xlog_is_shutdown(log)) {
    901		error = -EIO;
    902		goto out_unreserve;
    903	}
    904
    905	ASSERT(tp->t_ticket != NULL);
    906
    907	/*
    908	 * If we need to update the superblock, then do it now.
    909	 */
    910	if (tp->t_flags & XFS_TRANS_SB_DIRTY)
    911		xfs_trans_apply_sb_deltas(tp);
    912	xfs_trans_apply_dquot_deltas(tp);
    913
    914	xlog_cil_commit(log, tp, &commit_seq, regrant);
    915
    916	xfs_trans_free(tp);
    917
    918	/*
    919	 * If the transaction needs to be synchronous, then force the
    920	 * log out now and wait for it.
    921	 */
    922	if (sync) {
    923		error = xfs_log_force_seq(mp, commit_seq, XFS_LOG_SYNC, NULL);
    924		XFS_STATS_INC(mp, xs_trans_sync);
    925	} else {
    926		XFS_STATS_INC(mp, xs_trans_async);
    927	}
    928
    929	return error;
    930
    931out_unreserve:
    932	xfs_trans_unreserve_and_mod_sb(tp);
    933
    934	/*
    935	 * It is indeed possible for the transaction to be not dirty but
    936	 * the dqinfo portion to be.  All that means is that we have some
    937	 * (non-persistent) quota reservations that need to be unreserved.
    938	 */
    939	xfs_trans_unreserve_and_mod_dquots(tp);
    940	if (tp->t_ticket) {
    941		if (regrant && !xlog_is_shutdown(log))
    942			xfs_log_ticket_regrant(log, tp->t_ticket);
    943		else
    944			xfs_log_ticket_ungrant(log, tp->t_ticket);
    945		tp->t_ticket = NULL;
    946	}
    947	xfs_trans_free_items(tp, !!error);
    948	xfs_trans_free(tp);
    949
    950	XFS_STATS_INC(mp, xs_trans_empty);
    951	return error;
    952}
    953
    954int
    955xfs_trans_commit(
    956	struct xfs_trans	*tp)
    957{
    958	return __xfs_trans_commit(tp, false);
    959}
    960
    961/*
    962 * Unlock all of the transaction's items and free the transaction.  If the
    963 * transaction is dirty, we must shut down the filesystem because there is no
    964 * way to restore them to their previous state.
    965 *
    966 * If the transaction has made a log reservation, make sure to release it as
    967 * well.
    968 *
    969 * This is a high level function (equivalent to xfs_trans_commit()) and so can
    970 * be called after the transaction has effectively been aborted due to the mount
    971 * being shut down. However, if the mount has not been shut down and the
    972 * transaction is dirty we will shut the mount down and, in doing so, that
    973 * guarantees that the log is shut down, too. Hence we don't need to be as
    974 * careful with shutdown state and dirty items here as we need to be in
    975 * xfs_trans_commit().
    976 */
    977void
    978xfs_trans_cancel(
    979	struct xfs_trans	*tp)
    980{
    981	struct xfs_mount	*mp = tp->t_mountp;
    982	struct xlog		*log = mp->m_log;
    983	bool			dirty = (tp->t_flags & XFS_TRANS_DIRTY);
    984
    985	trace_xfs_trans_cancel(tp, _RET_IP_);
    986
    987	/*
    988	 * It's never valid to cancel a transaction with deferred ops attached,
    989	 * because the transaction is effectively dirty.  Complain about this
    990	 * loudly before freeing the in-memory defer items.
    991	 */
    992	if (!list_empty(&tp->t_dfops)) {
    993		ASSERT(xfs_is_shutdown(mp) || list_empty(&tp->t_dfops));
    994		ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
    995		dirty = true;
    996		xfs_defer_cancel(tp);
    997	}
    998
    999	/*
   1000	 * See if the caller is relying on us to shut down the filesystem. We
   1001	 * only want an error report if there isn't already a shutdown in
   1002	 * progress, so we only need to check against the mount shutdown state
   1003	 * here.
   1004	 */
   1005	if (dirty && !xfs_is_shutdown(mp)) {
   1006		XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
   1007		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
   1008	}
   1009#ifdef DEBUG
   1010	/* Log items need to be consistent until the log is shut down. */
   1011	if (!dirty && !xlog_is_shutdown(log)) {
   1012		struct xfs_log_item *lip;
   1013
   1014		list_for_each_entry(lip, &tp->t_items, li_trans)
   1015			ASSERT(!xlog_item_is_intent_done(lip));
   1016	}
   1017#endif
   1018	xfs_trans_unreserve_and_mod_sb(tp);
   1019	xfs_trans_unreserve_and_mod_dquots(tp);
   1020
   1021	if (tp->t_ticket) {
   1022		xfs_log_ticket_ungrant(log, tp->t_ticket);
   1023		tp->t_ticket = NULL;
   1024	}
   1025
   1026	xfs_trans_free_items(tp, dirty);
   1027	xfs_trans_free(tp);
   1028}
   1029
   1030/*
   1031 * Roll from one trans in the sequence of PERMANENT transactions to
   1032 * the next: permanent transactions are only flushed out when
   1033 * committed with xfs_trans_commit(), but we still want as soon
   1034 * as possible to let chunks of it go to the log. So we commit the
   1035 * chunk we've been working on and get a new transaction to continue.
   1036 */
   1037int
   1038xfs_trans_roll(
   1039	struct xfs_trans	**tpp)
   1040{
   1041	struct xfs_trans	*trans = *tpp;
   1042	struct xfs_trans_res	tres;
   1043	int			error;
   1044
   1045	trace_xfs_trans_roll(trans, _RET_IP_);
   1046
   1047	/*
   1048	 * Copy the critical parameters from one trans to the next.
   1049	 */
   1050	tres.tr_logres = trans->t_log_res;
   1051	tres.tr_logcount = trans->t_log_count;
   1052
   1053	*tpp = xfs_trans_dup(trans);
   1054
   1055	/*
   1056	 * Commit the current transaction.
   1057	 * If this commit failed, then it'd just unlock those items that
   1058	 * are not marked ihold. That also means that a filesystem shutdown
   1059	 * is in progress. The caller takes the responsibility to cancel
   1060	 * the duplicate transaction that gets returned.
   1061	 */
   1062	error = __xfs_trans_commit(trans, true);
   1063	if (error)
   1064		return error;
   1065
   1066	/*
   1067	 * Reserve space in the log for the next transaction.
   1068	 * This also pushes items in the "AIL", the list of logged items,
   1069	 * out to disk if they are taking up space at the tail of the log
   1070	 * that we want to use.  This requires that either nothing be locked
   1071	 * across this call, or that anything that is locked be logged in
   1072	 * the prior and the next transactions.
   1073	 */
   1074	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
   1075	return xfs_trans_reserve(*tpp, &tres, 0, 0);
   1076}
   1077
   1078/*
   1079 * Allocate an transaction, lock and join the inode to it, and reserve quota.
   1080 *
   1081 * The caller must ensure that the on-disk dquots attached to this inode have
   1082 * already been allocated and initialized.  The caller is responsible for
   1083 * releasing ILOCK_EXCL if a new transaction is returned.
   1084 */
   1085int
   1086xfs_trans_alloc_inode(
   1087	struct xfs_inode	*ip,
   1088	struct xfs_trans_res	*resv,
   1089	unsigned int		dblocks,
   1090	unsigned int		rblocks,
   1091	bool			force,
   1092	struct xfs_trans	**tpp)
   1093{
   1094	struct xfs_trans	*tp;
   1095	struct xfs_mount	*mp = ip->i_mount;
   1096	bool			retried = false;
   1097	int			error;
   1098
   1099retry:
   1100	error = xfs_trans_alloc(mp, resv, dblocks,
   1101			rblocks / mp->m_sb.sb_rextsize,
   1102			force ? XFS_TRANS_RESERVE : 0, &tp);
   1103	if (error)
   1104		return error;
   1105
   1106	xfs_ilock(ip, XFS_ILOCK_EXCL);
   1107	xfs_trans_ijoin(tp, ip, 0);
   1108
   1109	error = xfs_qm_dqattach_locked(ip, false);
   1110	if (error) {
   1111		/* Caller should have allocated the dquots! */
   1112		ASSERT(error != -ENOENT);
   1113		goto out_cancel;
   1114	}
   1115
   1116	error = xfs_trans_reserve_quota_nblks(tp, ip, dblocks, rblocks, force);
   1117	if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
   1118		xfs_trans_cancel(tp);
   1119		xfs_iunlock(ip, XFS_ILOCK_EXCL);
   1120		xfs_blockgc_free_quota(ip, 0);
   1121		retried = true;
   1122		goto retry;
   1123	}
   1124	if (error)
   1125		goto out_cancel;
   1126
   1127	*tpp = tp;
   1128	return 0;
   1129
   1130out_cancel:
   1131	xfs_trans_cancel(tp);
   1132	xfs_iunlock(ip, XFS_ILOCK_EXCL);
   1133	return error;
   1134}
   1135
   1136/*
   1137 * Allocate an transaction in preparation for inode creation by reserving quota
   1138 * against the given dquots.  Callers are not required to hold any inode locks.
   1139 */
   1140int
   1141xfs_trans_alloc_icreate(
   1142	struct xfs_mount	*mp,
   1143	struct xfs_trans_res	*resv,
   1144	struct xfs_dquot	*udqp,
   1145	struct xfs_dquot	*gdqp,
   1146	struct xfs_dquot	*pdqp,
   1147	unsigned int		dblocks,
   1148	struct xfs_trans	**tpp)
   1149{
   1150	struct xfs_trans	*tp;
   1151	bool			retried = false;
   1152	int			error;
   1153
   1154retry:
   1155	error = xfs_trans_alloc(mp, resv, dblocks, 0, 0, &tp);
   1156	if (error)
   1157		return error;
   1158
   1159	error = xfs_trans_reserve_quota_icreate(tp, udqp, gdqp, pdqp, dblocks);
   1160	if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
   1161		xfs_trans_cancel(tp);
   1162		xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
   1163		retried = true;
   1164		goto retry;
   1165	}
   1166	if (error) {
   1167		xfs_trans_cancel(tp);
   1168		return error;
   1169	}
   1170
   1171	*tpp = tp;
   1172	return 0;
   1173}
   1174
   1175/*
   1176 * Allocate an transaction, lock and join the inode to it, and reserve quota
   1177 * in preparation for inode attribute changes that include uid, gid, or prid
   1178 * changes.
   1179 *
   1180 * The caller must ensure that the on-disk dquots attached to this inode have
   1181 * already been allocated and initialized.  The ILOCK will be dropped when the
   1182 * transaction is committed or cancelled.
   1183 */
   1184int
   1185xfs_trans_alloc_ichange(
   1186	struct xfs_inode	*ip,
   1187	struct xfs_dquot	*new_udqp,
   1188	struct xfs_dquot	*new_gdqp,
   1189	struct xfs_dquot	*new_pdqp,
   1190	bool			force,
   1191	struct xfs_trans	**tpp)
   1192{
   1193	struct xfs_trans	*tp;
   1194	struct xfs_mount	*mp = ip->i_mount;
   1195	struct xfs_dquot	*udqp;
   1196	struct xfs_dquot	*gdqp;
   1197	struct xfs_dquot	*pdqp;
   1198	bool			retried = false;
   1199	int			error;
   1200
   1201retry:
   1202	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
   1203	if (error)
   1204		return error;
   1205
   1206	xfs_ilock(ip, XFS_ILOCK_EXCL);
   1207	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
   1208
   1209	error = xfs_qm_dqattach_locked(ip, false);
   1210	if (error) {
   1211		/* Caller should have allocated the dquots! */
   1212		ASSERT(error != -ENOENT);
   1213		goto out_cancel;
   1214	}
   1215
   1216	/*
   1217	 * For each quota type, skip quota reservations if the inode's dquots
   1218	 * now match the ones that came from the caller, or the caller didn't
   1219	 * pass one in.  The inode's dquots can change if we drop the ILOCK to
   1220	 * perform a blockgc scan, so we must preserve the caller's arguments.
   1221	 */
   1222	udqp = (new_udqp != ip->i_udquot) ? new_udqp : NULL;
   1223	gdqp = (new_gdqp != ip->i_gdquot) ? new_gdqp : NULL;
   1224	pdqp = (new_pdqp != ip->i_pdquot) ? new_pdqp : NULL;
   1225	if (udqp || gdqp || pdqp) {
   1226		unsigned int	qflags = XFS_QMOPT_RES_REGBLKS;
   1227
   1228		if (force)
   1229			qflags |= XFS_QMOPT_FORCE_RES;
   1230
   1231		/*
   1232		 * Reserve enough quota to handle blocks on disk and reserved
   1233		 * for a delayed allocation.  We'll actually transfer the
   1234		 * delalloc reservation between dquots at chown time, even
   1235		 * though that part is only semi-transactional.
   1236		 */
   1237		error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp,
   1238				pdqp, ip->i_nblocks + ip->i_delayed_blks,
   1239				1, qflags);
   1240		if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
   1241			xfs_trans_cancel(tp);
   1242			xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
   1243			retried = true;
   1244			goto retry;
   1245		}
   1246		if (error)
   1247			goto out_cancel;
   1248	}
   1249
   1250	*tpp = tp;
   1251	return 0;
   1252
   1253out_cancel:
   1254	xfs_trans_cancel(tp);
   1255	return error;
   1256}
   1257
   1258/*
   1259 * Allocate an transaction, lock and join the directory and child inodes to it,
   1260 * and reserve quota for a directory update.  If there isn't sufficient space,
   1261 * @dblocks will be set to zero for a reservationless directory update and
   1262 * @nospace_error will be set to a negative errno describing the space
   1263 * constraint we hit.
   1264 *
   1265 * The caller must ensure that the on-disk dquots attached to this inode have
   1266 * already been allocated and initialized.  The ILOCKs will be dropped when the
   1267 * transaction is committed or cancelled.
   1268 */
   1269int
   1270xfs_trans_alloc_dir(
   1271	struct xfs_inode	*dp,
   1272	struct xfs_trans_res	*resv,
   1273	struct xfs_inode	*ip,
   1274	unsigned int		*dblocks,
   1275	struct xfs_trans	**tpp,
   1276	int			*nospace_error)
   1277{
   1278	struct xfs_trans	*tp;
   1279	struct xfs_mount	*mp = ip->i_mount;
   1280	unsigned int		resblks;
   1281	bool			retried = false;
   1282	int			error;
   1283
   1284retry:
   1285	*nospace_error = 0;
   1286	resblks = *dblocks;
   1287	error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp);
   1288	if (error == -ENOSPC) {
   1289		*nospace_error = error;
   1290		resblks = 0;
   1291		error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp);
   1292	}
   1293	if (error)
   1294		return error;
   1295
   1296	xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
   1297
   1298	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
   1299	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
   1300
   1301	error = xfs_qm_dqattach_locked(dp, false);
   1302	if (error) {
   1303		/* Caller should have allocated the dquots! */
   1304		ASSERT(error != -ENOENT);
   1305		goto out_cancel;
   1306	}
   1307
   1308	error = xfs_qm_dqattach_locked(ip, false);
   1309	if (error) {
   1310		/* Caller should have allocated the dquots! */
   1311		ASSERT(error != -ENOENT);
   1312		goto out_cancel;
   1313	}
   1314
   1315	if (resblks == 0)
   1316		goto done;
   1317
   1318	error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false);
   1319	if (error == -EDQUOT || error == -ENOSPC) {
   1320		if (!retried) {
   1321			xfs_trans_cancel(tp);
   1322			xfs_blockgc_free_quota(dp, 0);
   1323			retried = true;
   1324			goto retry;
   1325		}
   1326
   1327		*nospace_error = error;
   1328		resblks = 0;
   1329		error = 0;
   1330	}
   1331	if (error)
   1332		goto out_cancel;
   1333
   1334done:
   1335	*tpp = tp;
   1336	*dblocks = resblks;
   1337	return 0;
   1338
   1339out_cancel:
   1340	xfs_trans_cancel(tp);
   1341	return error;
   1342}