xfs_trans_resv.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
xfs_trans_resv.c (33790B)
      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
      4 * Copyright (C) 2010 Red Hat, Inc.
      5 * All Rights Reserved.
      6 */
      7#include "xfs.h"
      8#include "xfs_fs.h"
      9#include "xfs_shared.h"
     10#include "xfs_format.h"
     11#include "xfs_log_format.h"
     12#include "xfs_trans_resv.h"
     13#include "xfs_mount.h"
     14#include "xfs_da_format.h"
     15#include "xfs_da_btree.h"
     16#include "xfs_inode.h"
     17#include "xfs_bmap_btree.h"
     18#include "xfs_quota.h"
     19#include "xfs_trans.h"
     20#include "xfs_qm.h"
     21#include "xfs_trans_space.h"
     22
     23#define _ALLOC	true
     24#define _FREE	false
     25
     26/*
     27 * A buffer has a format structure overhead in the log in addition
     28 * to the data, so we need to take this into account when reserving
     29 * space in a transaction for a buffer.  Round the space required up
     30 * to a multiple of 128 bytes so that we don't change the historical
     31 * reservation that has been used for this overhead.
     32 */
     33STATIC uint
     34xfs_buf_log_overhead(void)
     35{
     36	return round_up(sizeof(struct xlog_op_header) +
     37			sizeof(struct xfs_buf_log_format), 128);
     38}
     39
     40/*
     41 * Calculate out transaction log reservation per item in bytes.
     42 *
     43 * The nbufs argument is used to indicate the number of items that
     44 * will be changed in a transaction.  size is used to tell how many
     45 * bytes should be reserved per item.
     46 */
     47STATIC uint
     48xfs_calc_buf_res(
     49	uint		nbufs,
     50	uint		size)
     51{
     52	return nbufs * (size + xfs_buf_log_overhead());
     53}
     54
     55/*
     56 * Per-extent log reservation for the btree changes involved in freeing or
     57 * allocating an extent.  In classic XFS there were two trees that will be
     58 * modified (bnobt + cntbt).  With rmap enabled, there are three trees
     59 * (rmapbt).  The number of blocks reserved is based on the formula:
     60 *
     61 * num trees * ((2 blocks/level * max depth) - 1)
     62 *
     63 * Keep in mind that max depth is calculated separately for each type of tree.
     64 */
     65uint
     66xfs_allocfree_block_count(
     67	struct xfs_mount *mp,
     68	uint		num_ops)
     69{
     70	uint		blocks;
     71
     72	blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1);
     73	if (xfs_has_rmapbt(mp))
     74		blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
     75
     76	return blocks;
     77}
     78
     79/*
     80 * Per-extent log reservation for refcount btree changes.  These are never done
     81 * in the same transaction as an allocation or a free, so we compute them
     82 * separately.
     83 */
     84static unsigned int
     85xfs_refcountbt_block_count(
     86	struct xfs_mount	*mp,
     87	unsigned int		num_ops)
     88{
     89	return num_ops * (2 * mp->m_refc_maxlevels - 1);
     90}
     91
     92/*
     93 * Logging inodes is really tricksy. They are logged in memory format,
     94 * which means that what we write into the log doesn't directly translate into
     95 * the amount of space they use on disk.
     96 *
     97 * Case in point - btree format forks in memory format use more space than the
     98 * on-disk format. In memory, the buffer contains a normal btree block header so
     99 * the btree code can treat it as though it is just another generic buffer.
    100 * However, when we write it to the inode fork, we don't write all of this
    101 * header as it isn't needed. e.g. the root is only ever in the inode, so
    102 * there's no need for sibling pointers which would waste 16 bytes of space.
    103 *
    104 * Hence when we have an inode with a maximally sized btree format fork, then
    105 * amount of information we actually log is greater than the size of the inode
    106 * on disk. Hence we need an inode reservation function that calculates all this
    107 * correctly. So, we log:
    108 *
    109 * - 4 log op headers for object
    110 *	- for the ilf, the inode core and 2 forks
    111 * - inode log format object
    112 * - the inode core
    113 * - two inode forks containing bmap btree root blocks.
    114 *	- the btree data contained by both forks will fit into the inode size,
    115 *	  hence when combined with the inode core above, we have a total of the
    116 *	  actual inode size.
    117 *	- the BMBT headers need to be accounted separately, as they are
    118 *	  additional to the records and pointers that fit inside the inode
    119 *	  forks.
    120 */
    121STATIC uint
    122xfs_calc_inode_res(
    123	struct xfs_mount	*mp,
    124	uint			ninodes)
    125{
    126	return ninodes *
    127		(4 * sizeof(struct xlog_op_header) +
    128		 sizeof(struct xfs_inode_log_format) +
    129		 mp->m_sb.sb_inodesize +
    130		 2 * XFS_BMBT_BLOCK_LEN(mp));
    131}
    132
    133/*
    134 * Inode btree record insertion/removal modifies the inode btree and free space
    135 * btrees (since the inobt does not use the agfl). This requires the following
    136 * reservation:
    137 *
    138 * the inode btree: max depth * blocksize
    139 * the allocation btrees: 2 trees * (max depth - 1) * block size
    140 *
    141 * The caller must account for SB and AG header modifications, etc.
    142 */
    143STATIC uint
    144xfs_calc_inobt_res(
    145	struct xfs_mount	*mp)
    146{
    147	return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels,
    148			XFS_FSB_TO_B(mp, 1)) +
    149				xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
    150			XFS_FSB_TO_B(mp, 1));
    151}
    152
    153/*
    154 * The free inode btree is a conditional feature. The behavior differs slightly
    155 * from that of the traditional inode btree in that the finobt tracks records
    156 * for inode chunks with at least one free inode. A record can be removed from
    157 * the tree during individual inode allocation. Therefore the finobt
    158 * reservation is unconditional for both the inode chunk allocation and
    159 * individual inode allocation (modify) cases.
    160 *
    161 * Behavior aside, the reservation for finobt modification is equivalent to the
    162 * traditional inobt: cover a full finobt shape change plus block allocation.
    163 */
    164STATIC uint
    165xfs_calc_finobt_res(
    166	struct xfs_mount	*mp)
    167{
    168	if (!xfs_has_finobt(mp))
    169		return 0;
    170
    171	return xfs_calc_inobt_res(mp);
    172}
    173
    174/*
    175 * Calculate the reservation required to allocate or free an inode chunk. This
    176 * includes:
    177 *
    178 * the allocation btrees: 2 trees * (max depth - 1) * block size
    179 * the inode chunk: m_ino_geo.ialloc_blks * N
    180 *
    181 * The size N of the inode chunk reservation depends on whether it is for
    182 * allocation or free and which type of create transaction is in use. An inode
    183 * chunk free always invalidates the buffers and only requires reservation for
    184 * headers (N == 0). An inode chunk allocation requires a chunk sized
    185 * reservation on v4 and older superblocks to initialize the chunk. No chunk
    186 * reservation is required for allocation on v5 supers, which use ordered
    187 * buffers to initialize.
    188 */
    189STATIC uint
    190xfs_calc_inode_chunk_res(
    191	struct xfs_mount	*mp,
    192	bool			alloc)
    193{
    194	uint			res, size = 0;
    195
    196	res = xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
    197			       XFS_FSB_TO_B(mp, 1));
    198	if (alloc) {
    199		/* icreate tx uses ordered buffers */
    200		if (xfs_has_v3inodes(mp))
    201			return res;
    202		size = XFS_FSB_TO_B(mp, 1);
    203	}
    204
    205	res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size);
    206	return res;
    207}
    208
    209/*
    210 * Per-extent log reservation for the btree changes involved in freeing or
    211 * allocating a realtime extent.  We have to be able to log as many rtbitmap
    212 * blocks as needed to mark inuse XFS_BMBT_MAX_EXTLEN blocks' worth of realtime
    213 * extents, as well as the realtime summary block.
    214 */
    215static unsigned int
    216xfs_rtalloc_block_count(
    217	struct xfs_mount	*mp,
    218	unsigned int		num_ops)
    219{
    220	unsigned int		blksz = XFS_FSB_TO_B(mp, 1);
    221	unsigned int		rtbmp_bytes;
    222
    223	rtbmp_bytes = (XFS_MAX_BMBT_EXTLEN / mp->m_sb.sb_rextsize) / NBBY;
    224	return (howmany(rtbmp_bytes, blksz) + 1) * num_ops;
    225}
    226
    227/*
    228 * Various log reservation values.
    229 *
    230 * These are based on the size of the file system block because that is what
    231 * most transactions manipulate.  Each adds in an additional 128 bytes per
    232 * item logged to try to account for the overhead of the transaction mechanism.
    233 *
    234 * Note:  Most of the reservations underestimate the number of allocation
    235 * groups into which they could free extents in the xfs_defer_finish() call.
    236 * This is because the number in the worst case is quite high and quite
    237 * unusual.  In order to fix this we need to change xfs_defer_finish() to free
    238 * extents in only a single AG at a time.  This will require changes to the
    239 * EFI code as well, however, so that the EFI for the extents not freed is
    240 * logged again in each transaction.  See SGI PV #261917.
    241 *
    242 * Reservation functions here avoid a huge stack in xfs_trans_init due to
    243 * register overflow from temporaries in the calculations.
    244 */
    245
    246/*
    247 * Compute the log reservation required to handle the refcount update
    248 * transaction.  Refcount updates are always done via deferred log items.
    249 *
    250 * This is calculated as:
    251 * Data device refcount updates (t1):
    252 *    the agfs of the ags containing the blocks: nr_ops * sector size
    253 *    the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
    254 */
    255static unsigned int
    256xfs_calc_refcountbt_reservation(
    257	struct xfs_mount	*mp,
    258	unsigned int		nr_ops)
    259{
    260	unsigned int		blksz = XFS_FSB_TO_B(mp, 1);
    261
    262	if (!xfs_has_reflink(mp))
    263		return 0;
    264
    265	return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
    266	       xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops), blksz);
    267}
    268
    269/*
    270 * In a write transaction we can allocate a maximum of 2
    271 * extents.  This gives (t1):
    272 *    the inode getting the new extents: inode size
    273 *    the inode's bmap btree: max depth * block size
    274 *    the agfs of the ags from which the extents are allocated: 2 * sector
    275 *    the superblock free block counter: sector size
    276 *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
    277 * Or, if we're writing to a realtime file (t2):
    278 *    the inode getting the new extents: inode size
    279 *    the inode's bmap btree: max depth * block size
    280 *    the agfs of the ags from which the extents are allocated: 2 * sector
    281 *    the superblock free block counter: sector size
    282 *    the realtime bitmap: ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
    283 *    the realtime summary: 1 block
    284 *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
    285 * And the bmap_finish transaction can free bmap blocks in a join (t3):
    286 *    the agfs of the ags containing the blocks: 2 * sector size
    287 *    the agfls of the ags containing the blocks: 2 * sector size
    288 *    the super block free block counter: sector size
    289 *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
    290 * And any refcount updates that happen in a separate transaction (t4).
    291 */
    292STATIC uint
    293xfs_calc_write_reservation(
    294	struct xfs_mount	*mp,
    295	bool			for_minlogsize)
    296{
    297	unsigned int		t1, t2, t3, t4;
    298	unsigned int		blksz = XFS_FSB_TO_B(mp, 1);
    299
    300	t1 = xfs_calc_inode_res(mp, 1) +
    301	     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
    302	     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
    303	     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
    304
    305	if (xfs_has_realtime(mp)) {
    306		t2 = xfs_calc_inode_res(mp, 1) +
    307		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
    308				     blksz) +
    309		     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
    310		     xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 1), blksz) +
    311		     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), blksz);
    312	} else {
    313		t2 = 0;
    314	}
    315
    316	t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
    317	     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
    318
    319	/*
    320	 * In the early days of reflink, we included enough reservation to log
    321	 * two refcountbt splits for each transaction.  The codebase runs
    322	 * refcountbt updates in separate transactions now, so to compute the
    323	 * minimum log size, add the refcountbtree splits back to t1 and t3 and
    324	 * do not account them separately as t4.  Reflink did not support
    325	 * realtime when the reservations were established, so no adjustment to
    326	 * t2 is needed.
    327	 */
    328	if (for_minlogsize) {
    329		unsigned int	adj = 0;
    330
    331		if (xfs_has_reflink(mp))
    332			adj = xfs_calc_buf_res(
    333					xfs_refcountbt_block_count(mp, 2),
    334					blksz);
    335		t1 += adj;
    336		t3 += adj;
    337		return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
    338	}
    339
    340	t4 = xfs_calc_refcountbt_reservation(mp, 1);
    341	return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
    342}
    343
    344unsigned int
    345xfs_calc_write_reservation_minlogsize(
    346	struct xfs_mount	*mp)
    347{
    348	return xfs_calc_write_reservation(mp, true);
    349}
    350
    351/*
    352 * In truncating a file we free up to two extents at once.  We can modify (t1):
    353 *    the inode being truncated: inode size
    354 *    the inode's bmap btree: (max depth + 1) * block size
    355 * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
    356 *    the agf for each of the ags: 4 * sector size
    357 *    the agfl for each of the ags: 4 * sector size
    358 *    the super block to reflect the freed blocks: sector size
    359 *    worst case split in allocation btrees per extent assuming 4 extents:
    360 *		4 exts * 2 trees * (2 * max depth - 1) * block size
    361 * Or, if it's a realtime file (t3):
    362 *    the agf for each of the ags: 2 * sector size
    363 *    the agfl for each of the ags: 2 * sector size
    364 *    the super block to reflect the freed blocks: sector size
    365 *    the realtime bitmap:
    366 *		2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
    367 *    the realtime summary: 2 exts * 1 block
    368 *    worst case split in allocation btrees per extent assuming 2 extents:
    369 *		2 exts * 2 trees * (2 * max depth - 1) * block size
    370 * And any refcount updates that happen in a separate transaction (t4).
    371 */
    372STATIC uint
    373xfs_calc_itruncate_reservation(
    374	struct xfs_mount	*mp,
    375	bool			for_minlogsize)
    376{
    377	unsigned int		t1, t2, t3, t4;
    378	unsigned int		blksz = XFS_FSB_TO_B(mp, 1);
    379
    380	t1 = xfs_calc_inode_res(mp, 1) +
    381	     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
    382
    383	t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
    384	     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4), blksz);
    385
    386	if (xfs_has_realtime(mp)) {
    387		t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
    388		     xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 2), blksz) +
    389		     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
    390	} else {
    391		t3 = 0;
    392	}
    393
    394	/*
    395	 * In the early days of reflink, we included enough reservation to log
    396	 * four refcountbt splits in the same transaction as bnobt/cntbt
    397	 * updates.  The codebase runs refcountbt updates in separate
    398	 * transactions now, so to compute the minimum log size, add the
    399	 * refcount btree splits back here and do not compute them separately
    400	 * as t4.  Reflink did not support realtime when the reservations were
    401	 * established, so do not adjust t3.
    402	 */
    403	if (for_minlogsize) {
    404		if (xfs_has_reflink(mp))
    405			t2 += xfs_calc_buf_res(
    406					xfs_refcountbt_block_count(mp, 4),
    407					blksz);
    408
    409		return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
    410	}
    411
    412	t4 = xfs_calc_refcountbt_reservation(mp, 2);
    413	return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
    414}
    415
    416unsigned int
    417xfs_calc_itruncate_reservation_minlogsize(
    418	struct xfs_mount	*mp)
    419{
    420	return xfs_calc_itruncate_reservation(mp, true);
    421}
    422
    423/*
    424 * In renaming a files we can modify:
    425 *    the four inodes involved: 4 * inode size
    426 *    the two directory btrees: 2 * (max depth + v2) * dir block size
    427 *    the two directory bmap btrees: 2 * max depth * block size
    428 * And the bmap_finish transaction can free dir and bmap blocks (two sets
    429 *	of bmap blocks) giving:
    430 *    the agf for the ags in which the blocks live: 3 * sector size
    431 *    the agfl for the ags in which the blocks live: 3 * sector size
    432 *    the superblock for the free block count: sector size
    433 *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
    434 */
    435STATIC uint
    436xfs_calc_rename_reservation(
    437	struct xfs_mount	*mp)
    438{
    439	return XFS_DQUOT_LOGRES(mp) +
    440		max((xfs_calc_inode_res(mp, 4) +
    441		     xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
    442				      XFS_FSB_TO_B(mp, 1))),
    443		    (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
    444		     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3),
    445				      XFS_FSB_TO_B(mp, 1))));
    446}
    447
    448/*
    449 * For removing an inode from unlinked list at first, we can modify:
    450 *    the agi hash list and counters: sector size
    451 *    the on disk inode before ours in the agi hash list: inode cluster size
    452 *    the on disk inode in the agi hash list: inode cluster size
    453 */
    454STATIC uint
    455xfs_calc_iunlink_remove_reservation(
    456	struct xfs_mount        *mp)
    457{
    458	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
    459	       2 * M_IGEO(mp)->inode_cluster_size;
    460}
    461
    462/*
    463 * For creating a link to an inode:
    464 *    the parent directory inode: inode size
    465 *    the linked inode: inode size
    466 *    the directory btree could split: (max depth + v2) * dir block size
    467 *    the directory bmap btree could join or split: (max depth + v2) * blocksize
    468 * And the bmap_finish transaction can free some bmap blocks giving:
    469 *    the agf for the ag in which the blocks live: sector size
    470 *    the agfl for the ag in which the blocks live: sector size
    471 *    the superblock for the free block count: sector size
    472 *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
    473 */
    474STATIC uint
    475xfs_calc_link_reservation(
    476	struct xfs_mount	*mp)
    477{
    478	return XFS_DQUOT_LOGRES(mp) +
    479		xfs_calc_iunlink_remove_reservation(mp) +
    480		max((xfs_calc_inode_res(mp, 2) +
    481		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
    482				      XFS_FSB_TO_B(mp, 1))),
    483		    (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
    484		     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
    485				      XFS_FSB_TO_B(mp, 1))));
    486}
    487
    488/*
    489 * For adding an inode to unlinked list we can modify:
    490 *    the agi hash list: sector size
    491 *    the on disk inode: inode cluster size
    492 */
    493STATIC uint
    494xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
    495{
    496	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
    497			M_IGEO(mp)->inode_cluster_size;
    498}
    499
    500/*
    501 * For removing a directory entry we can modify:
    502 *    the parent directory inode: inode size
    503 *    the removed inode: inode size
    504 *    the directory btree could join: (max depth + v2) * dir block size
    505 *    the directory bmap btree could join or split: (max depth + v2) * blocksize
    506 * And the bmap_finish transaction can free the dir and bmap blocks giving:
    507 *    the agf for the ag in which the blocks live: 2 * sector size
    508 *    the agfl for the ag in which the blocks live: 2 * sector size
    509 *    the superblock for the free block count: sector size
    510 *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
    511 */
    512STATIC uint
    513xfs_calc_remove_reservation(
    514	struct xfs_mount	*mp)
    515{
    516	return XFS_DQUOT_LOGRES(mp) +
    517		xfs_calc_iunlink_add_reservation(mp) +
    518		max((xfs_calc_inode_res(mp, 1) +
    519		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
    520				      XFS_FSB_TO_B(mp, 1))),
    521		    (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
    522		     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
    523				      XFS_FSB_TO_B(mp, 1))));
    524}
    525
    526/*
    527 * For create, break it in to the two cases that the transaction
    528 * covers. We start with the modify case - allocation done by modification
    529 * of the state of existing inodes - and the allocation case.
    530 */
    531
    532/*
    533 * For create we can modify:
    534 *    the parent directory inode: inode size
    535 *    the new inode: inode size
    536 *    the inode btree entry: block size
    537 *    the superblock for the nlink flag: sector size
    538 *    the directory btree: (max depth + v2) * dir block size
    539 *    the directory inode's bmap btree: (max depth + v2) * block size
    540 *    the finobt (record modification and allocation btrees)
    541 */
    542STATIC uint
    543xfs_calc_create_resv_modify(
    544	struct xfs_mount	*mp)
    545{
    546	return xfs_calc_inode_res(mp, 2) +
    547		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
    548		(uint)XFS_FSB_TO_B(mp, 1) +
    549		xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
    550		xfs_calc_finobt_res(mp);
    551}
    552
    553/*
    554 * For icreate we can allocate some inodes giving:
    555 *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
    556 *    the superblock for the nlink flag: sector size
    557 *    the inode chunk (allocation, optional init)
    558 *    the inobt (record insertion)
    559 *    the finobt (optional, record insertion)
    560 */
    561STATIC uint
    562xfs_calc_icreate_resv_alloc(
    563	struct xfs_mount	*mp)
    564{
    565	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
    566		mp->m_sb.sb_sectsize +
    567		xfs_calc_inode_chunk_res(mp, _ALLOC) +
    568		xfs_calc_inobt_res(mp) +
    569		xfs_calc_finobt_res(mp);
    570}
    571
    572STATIC uint
    573xfs_calc_icreate_reservation(xfs_mount_t *mp)
    574{
    575	return XFS_DQUOT_LOGRES(mp) +
    576		max(xfs_calc_icreate_resv_alloc(mp),
    577		    xfs_calc_create_resv_modify(mp));
    578}
    579
    580STATIC uint
    581xfs_calc_create_tmpfile_reservation(
    582	struct xfs_mount        *mp)
    583{
    584	uint	res = XFS_DQUOT_LOGRES(mp);
    585
    586	res += xfs_calc_icreate_resv_alloc(mp);
    587	return res + xfs_calc_iunlink_add_reservation(mp);
    588}
    589
    590/*
    591 * Making a new directory is the same as creating a new file.
    592 */
    593STATIC uint
    594xfs_calc_mkdir_reservation(
    595	struct xfs_mount	*mp)
    596{
    597	return xfs_calc_icreate_reservation(mp);
    598}
    599
    600
    601/*
    602 * Making a new symplink is the same as creating a new file, but
    603 * with the added blocks for remote symlink data which can be up to 1kB in
    604 * length (XFS_SYMLINK_MAXLEN).
    605 */
    606STATIC uint
    607xfs_calc_symlink_reservation(
    608	struct xfs_mount	*mp)
    609{
    610	return xfs_calc_icreate_reservation(mp) +
    611	       xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
    612}
    613
    614/*
    615 * In freeing an inode we can modify:
    616 *    the inode being freed: inode size
    617 *    the super block free inode counter, AGF and AGFL: sector size
    618 *    the on disk inode (agi unlinked list removal)
    619 *    the inode chunk (invalidated, headers only)
    620 *    the inode btree
    621 *    the finobt (record insertion, removal or modification)
    622 *
    623 * Note that the inode chunk res. includes an allocfree res. for freeing of the
    624 * inode chunk. This is technically extraneous because the inode chunk free is
    625 * deferred (it occurs after a transaction roll). Include the extra reservation
    626 * anyways since we've had reports of ifree transaction overruns due to too many
    627 * agfl fixups during inode chunk frees.
    628 */
    629STATIC uint
    630xfs_calc_ifree_reservation(
    631	struct xfs_mount	*mp)
    632{
    633	return XFS_DQUOT_LOGRES(mp) +
    634		xfs_calc_inode_res(mp, 1) +
    635		xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
    636		xfs_calc_iunlink_remove_reservation(mp) +
    637		xfs_calc_inode_chunk_res(mp, _FREE) +
    638		xfs_calc_inobt_res(mp) +
    639		xfs_calc_finobt_res(mp);
    640}
    641
    642/*
    643 * When only changing the inode we log the inode and possibly the superblock
    644 * We also add a bit of slop for the transaction stuff.
    645 */
    646STATIC uint
    647xfs_calc_ichange_reservation(
    648	struct xfs_mount	*mp)
    649{
    650	return XFS_DQUOT_LOGRES(mp) +
    651		xfs_calc_inode_res(mp, 1) +
    652		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
    653
    654}
    655
    656/*
    657 * Growing the data section of the filesystem.
    658 *	superblock
    659 *	agi and agf
    660 *	allocation btrees
    661 */
    662STATIC uint
    663xfs_calc_growdata_reservation(
    664	struct xfs_mount	*mp)
    665{
    666	return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
    667		xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
    668				 XFS_FSB_TO_B(mp, 1));
    669}
    670
    671/*
    672 * Growing the rt section of the filesystem.
    673 * In the first set of transactions (ALLOC) we allocate space to the
    674 * bitmap or summary files.
    675 *	superblock: sector size
    676 *	agf of the ag from which the extent is allocated: sector size
    677 *	bmap btree for bitmap/summary inode: max depth * blocksize
    678 *	bitmap/summary inode: inode size
    679 *	allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
    680 */
    681STATIC uint
    682xfs_calc_growrtalloc_reservation(
    683	struct xfs_mount	*mp)
    684{
    685	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
    686		xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
    687				 XFS_FSB_TO_B(mp, 1)) +
    688		xfs_calc_inode_res(mp, 1) +
    689		xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
    690				 XFS_FSB_TO_B(mp, 1));
    691}
    692
    693/*
    694 * Growing the rt section of the filesystem.
    695 * In the second set of transactions (ZERO) we zero the new metadata blocks.
    696 *	one bitmap/summary block: blocksize
    697 */
    698STATIC uint
    699xfs_calc_growrtzero_reservation(
    700	struct xfs_mount	*mp)
    701{
    702	return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
    703}
    704
    705/*
    706 * Growing the rt section of the filesystem.
    707 * In the third set of transactions (FREE) we update metadata without
    708 * allocating any new blocks.
    709 *	superblock: sector size
    710 *	bitmap inode: inode size
    711 *	summary inode: inode size
    712 *	one bitmap block: blocksize
    713 *	summary blocks: new summary size
    714 */
    715STATIC uint
    716xfs_calc_growrtfree_reservation(
    717	struct xfs_mount	*mp)
    718{
    719	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
    720		xfs_calc_inode_res(mp, 2) +
    721		xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
    722		xfs_calc_buf_res(1, mp->m_rsumsize);
    723}
    724
    725/*
    726 * Logging the inode modification timestamp on a synchronous write.
    727 *	inode
    728 */
    729STATIC uint
    730xfs_calc_swrite_reservation(
    731	struct xfs_mount	*mp)
    732{
    733	return xfs_calc_inode_res(mp, 1);
    734}
    735
    736/*
    737 * Logging the inode mode bits when writing a setuid/setgid file
    738 *	inode
    739 */
    740STATIC uint
    741xfs_calc_writeid_reservation(
    742	struct xfs_mount	*mp)
    743{
    744	return xfs_calc_inode_res(mp, 1);
    745}
    746
    747/*
    748 * Converting the inode from non-attributed to attributed.
    749 *	the inode being converted: inode size
    750 *	agf block and superblock (for block allocation)
    751 *	the new block (directory sized)
    752 *	bmap blocks for the new directory block
    753 *	allocation btrees
    754 */
    755STATIC uint
    756xfs_calc_addafork_reservation(
    757	struct xfs_mount	*mp)
    758{
    759	return XFS_DQUOT_LOGRES(mp) +
    760		xfs_calc_inode_res(mp, 1) +
    761		xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
    762		xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
    763		xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
    764				 XFS_FSB_TO_B(mp, 1)) +
    765		xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
    766				 XFS_FSB_TO_B(mp, 1));
    767}
    768
    769/*
    770 * Removing the attribute fork of a file
    771 *    the inode being truncated: inode size
    772 *    the inode's bmap btree: max depth * block size
    773 * And the bmap_finish transaction can free the blocks and bmap blocks:
    774 *    the agf for each of the ags: 4 * sector size
    775 *    the agfl for each of the ags: 4 * sector size
    776 *    the super block to reflect the freed blocks: sector size
    777 *    worst case split in allocation btrees per extent assuming 4 extents:
    778 *		4 exts * 2 trees * (2 * max depth - 1) * block size
    779 */
    780STATIC uint
    781xfs_calc_attrinval_reservation(
    782	struct xfs_mount	*mp)
    783{
    784	return max((xfs_calc_inode_res(mp, 1) +
    785		    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
    786				     XFS_FSB_TO_B(mp, 1))),
    787		   (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
    788		    xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4),
    789				     XFS_FSB_TO_B(mp, 1))));
    790}
    791
    792/*
    793 * Setting an attribute at mount time.
    794 *	the inode getting the attribute
    795 *	the superblock for allocations
    796 *	the agfs extents are allocated from
    797 *	the attribute btree * max depth
    798 *	the inode allocation btree
    799 * Since attribute transaction space is dependent on the size of the attribute,
    800 * the calculation is done partially at mount time and partially at runtime(see
    801 * below).
    802 */
    803STATIC uint
    804xfs_calc_attrsetm_reservation(
    805	struct xfs_mount	*mp)
    806{
    807	return XFS_DQUOT_LOGRES(mp) +
    808		xfs_calc_inode_res(mp, 1) +
    809		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
    810		xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
    811}
    812
    813/*
    814 * Setting an attribute at runtime, transaction space unit per block.
    815 * 	the superblock for allocations: sector size
    816 *	the inode bmap btree could join or split: max depth * block size
    817 * Since the runtime attribute transaction space is dependent on the total
    818 * blocks needed for the 1st bmap, here we calculate out the space unit for
    819 * one block so that the caller could figure out the total space according
    820 * to the attibute extent length in blocks by:
    821 *	ext * M_RES(mp)->tr_attrsetrt.tr_logres
    822 */
    823STATIC uint
    824xfs_calc_attrsetrt_reservation(
    825	struct xfs_mount	*mp)
    826{
    827	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
    828		xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
    829				 XFS_FSB_TO_B(mp, 1));
    830}
    831
    832/*
    833 * Removing an attribute.
    834 *    the inode: inode size
    835 *    the attribute btree could join: max depth * block size
    836 *    the inode bmap btree could join or split: max depth * block size
    837 * And the bmap_finish transaction can free the attr blocks freed giving:
    838 *    the agf for the ag in which the blocks live: 2 * sector size
    839 *    the agfl for the ag in which the blocks live: 2 * sector size
    840 *    the superblock for the free block count: sector size
    841 *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
    842 */
    843STATIC uint
    844xfs_calc_attrrm_reservation(
    845	struct xfs_mount	*mp)
    846{
    847	return XFS_DQUOT_LOGRES(mp) +
    848		max((xfs_calc_inode_res(mp, 1) +
    849		     xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
    850				      XFS_FSB_TO_B(mp, 1)) +
    851		     (uint)XFS_FSB_TO_B(mp,
    852					XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
    853		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
    854		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
    855		     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
    856				      XFS_FSB_TO_B(mp, 1))));
    857}
    858
    859/*
    860 * Clearing a bad agino number in an agi hash bucket.
    861 */
    862STATIC uint
    863xfs_calc_clear_agi_bucket_reservation(
    864	struct xfs_mount	*mp)
    865{
    866	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
    867}
    868
    869/*
    870 * Adjusting quota limits.
    871 *    the disk quota buffer: sizeof(struct xfs_disk_dquot)
    872 */
    873STATIC uint
    874xfs_calc_qm_setqlim_reservation(void)
    875{
    876	return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
    877}
    878
    879/*
    880 * Allocating quota on disk if needed.
    881 *	the write transaction log space for quota file extent allocation
    882 *	the unit of quota allocation: one system block size
    883 */
    884STATIC uint
    885xfs_calc_qm_dqalloc_reservation(
    886	struct xfs_mount	*mp,
    887	bool			for_minlogsize)
    888{
    889	return xfs_calc_write_reservation(mp, for_minlogsize) +
    890		xfs_calc_buf_res(1,
    891			XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
    892}
    893
    894unsigned int
    895xfs_calc_qm_dqalloc_reservation_minlogsize(
    896	struct xfs_mount	*mp)
    897{
    898	return xfs_calc_qm_dqalloc_reservation(mp, true);
    899}
    900
    901/*
    902 * Syncing the incore super block changes to disk.
    903 *     the super block to reflect the changes: sector size
    904 */
    905STATIC uint
    906xfs_calc_sb_reservation(
    907	struct xfs_mount	*mp)
    908{
    909	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
    910}
    911
    912void
    913xfs_trans_resv_calc(
    914	struct xfs_mount	*mp,
    915	struct xfs_trans_resv	*resp)
    916{
    917	int			logcount_adj = 0;
    918
    919	/*
    920	 * The following transactions are logged in physical format and
    921	 * require a permanent reservation on space.
    922	 */
    923	resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
    924	resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
    925	resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    926
    927	resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
    928	resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
    929	resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    930
    931	resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
    932	resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT;
    933	resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    934
    935	resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
    936	resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT;
    937	resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    938
    939	resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
    940	resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT;
    941	resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    942
    943	resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
    944	resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
    945	resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    946
    947	resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
    948	resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
    949	resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    950
    951	resp->tr_create_tmpfile.tr_logres =
    952			xfs_calc_create_tmpfile_reservation(mp);
    953	resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
    954	resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    955
    956	resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
    957	resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
    958	resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    959
    960	resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
    961	resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
    962	resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    963
    964	resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp);
    965	resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT;
    966	resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    967
    968	resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp);
    969	resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT;
    970	resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    971
    972	resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp);
    973	resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT;
    974	resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    975
    976	resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp);
    977	resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT;
    978	resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    979
    980	resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp);
    981	resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
    982	resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    983
    984	resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp,
    985			false);
    986	resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
    987	resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
    988
    989	/*
    990	 * The following transactions are logged in logical format with
    991	 * a default log count.
    992	 */
    993	resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation();
    994	resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
    995
    996	resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp);
    997	resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT;
    998
    999	/* growdata requires permanent res; it can free space to the last AG */
   1000	resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
   1001	resp->tr_growdata.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
   1002	resp->tr_growdata.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
   1003
   1004	/* The following transaction are logged in logical format */
   1005	resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
   1006	resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
   1007	resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
   1008	resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
   1009	resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
   1010	resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
   1011	resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
   1012
   1013	/*
   1014	 * Add one logcount for BUI items that appear with rmap or reflink,
   1015	 * one logcount for refcount intent items, and one logcount for rmap
   1016	 * intent items.
   1017	 */
   1018	if (xfs_has_reflink(mp) || xfs_has_rmapbt(mp))
   1019		logcount_adj++;
   1020	if (xfs_has_reflink(mp))
   1021		logcount_adj++;
   1022	if (xfs_has_rmapbt(mp))
   1023		logcount_adj++;
   1024
   1025	resp->tr_itruncate.tr_logcount += logcount_adj;
   1026	resp->tr_write.tr_logcount += logcount_adj;
   1027	resp->tr_qm_dqalloc.tr_logcount += logcount_adj;
   1028}