cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xfs_ag.c (25290B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/*
      3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
      4 * Copyright (c) 2018 Red Hat, Inc.
      5 * All rights reserved.
      6 */
      7
      8#include "xfs.h"
      9#include "xfs_fs.h"
     10#include "xfs_shared.h"
     11#include "xfs_format.h"
     12#include "xfs_trans_resv.h"
     13#include "xfs_bit.h"
     14#include "xfs_sb.h"
     15#include "xfs_mount.h"
     16#include "xfs_btree.h"
     17#include "xfs_alloc_btree.h"
     18#include "xfs_rmap_btree.h"
     19#include "xfs_alloc.h"
     20#include "xfs_ialloc.h"
     21#include "xfs_rmap.h"
     22#include "xfs_ag.h"
     23#include "xfs_ag_resv.h"
     24#include "xfs_health.h"
     25#include "xfs_error.h"
     26#include "xfs_bmap.h"
     27#include "xfs_defer.h"
     28#include "xfs_log_format.h"
     29#include "xfs_trans.h"
     30#include "xfs_trace.h"
     31#include "xfs_inode.h"
     32#include "xfs_icache.h"
     33
     34
     35/*
     36 * Passive reference counting access wrappers to the perag structures.  If the
     37 * per-ag structure is to be freed, the freeing code is responsible for cleaning
     38 * up objects with passive references before freeing the structure. This is
     39 * things like cached buffers.
     40 */
     41struct xfs_perag *
     42xfs_perag_get(
     43	struct xfs_mount	*mp,
     44	xfs_agnumber_t		agno)
     45{
     46	struct xfs_perag	*pag;
     47	int			ref = 0;
     48
     49	rcu_read_lock();
     50	pag = radix_tree_lookup(&mp->m_perag_tree, agno);
     51	if (pag) {
     52		ASSERT(atomic_read(&pag->pag_ref) >= 0);
     53		ref = atomic_inc_return(&pag->pag_ref);
     54	}
     55	rcu_read_unlock();
     56	trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
     57	return pag;
     58}
     59
     60/*
     61 * search from @first to find the next perag with the given tag set.
     62 */
     63struct xfs_perag *
     64xfs_perag_get_tag(
     65	struct xfs_mount	*mp,
     66	xfs_agnumber_t		first,
     67	unsigned int		tag)
     68{
     69	struct xfs_perag	*pag;
     70	int			found;
     71	int			ref;
     72
     73	rcu_read_lock();
     74	found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
     75					(void **)&pag, first, 1, tag);
     76	if (found <= 0) {
     77		rcu_read_unlock();
     78		return NULL;
     79	}
     80	ref = atomic_inc_return(&pag->pag_ref);
     81	rcu_read_unlock();
     82	trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
     83	return pag;
     84}
     85
     86void
     87xfs_perag_put(
     88	struct xfs_perag	*pag)
     89{
     90	int	ref;
     91
     92	ASSERT(atomic_read(&pag->pag_ref) > 0);
     93	ref = atomic_dec_return(&pag->pag_ref);
     94	trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
     95}
     96
     97/*
     98 * xfs_initialize_perag_data
     99 *
    100 * Read in each per-ag structure so we can count up the number of
    101 * allocated inodes, free inodes and used filesystem blocks as this
    102 * information is no longer persistent in the superblock. Once we have
    103 * this information, write it into the in-core superblock structure.
    104 */
    105int
    106xfs_initialize_perag_data(
    107	struct xfs_mount	*mp,
    108	xfs_agnumber_t		agcount)
    109{
    110	xfs_agnumber_t		index;
    111	struct xfs_perag	*pag;
    112	struct xfs_sb		*sbp = &mp->m_sb;
    113	uint64_t		ifree = 0;
    114	uint64_t		ialloc = 0;
    115	uint64_t		bfree = 0;
    116	uint64_t		bfreelst = 0;
    117	uint64_t		btree = 0;
    118	uint64_t		fdblocks;
    119	int			error = 0;
    120
    121	for (index = 0; index < agcount; index++) {
    122		/*
    123		 * read the agf, then the agi. This gets us
    124		 * all the information we need and populates the
    125		 * per-ag structures for us.
    126		 */
    127		error = xfs_alloc_pagf_init(mp, NULL, index, 0);
    128		if (error)
    129			return error;
    130
    131		error = xfs_ialloc_pagi_init(mp, NULL, index);
    132		if (error)
    133			return error;
    134		pag = xfs_perag_get(mp, index);
    135		ifree += pag->pagi_freecount;
    136		ialloc += pag->pagi_count;
    137		bfree += pag->pagf_freeblks;
    138		bfreelst += pag->pagf_flcount;
    139		btree += pag->pagf_btreeblks;
    140		xfs_perag_put(pag);
    141	}
    142	fdblocks = bfree + bfreelst + btree;
    143
    144	/*
    145	 * If the new summary counts are obviously incorrect, fail the
    146	 * mount operation because that implies the AGFs are also corrupt.
    147	 * Clear FS_COUNTERS so that we don't unmount with a dirty log, which
    148	 * will prevent xfs_repair from fixing anything.
    149	 */
    150	if (fdblocks > sbp->sb_dblocks || ifree > ialloc) {
    151		xfs_alert(mp, "AGF corruption. Please run xfs_repair.");
    152		error = -EFSCORRUPTED;
    153		goto out;
    154	}
    155
    156	/* Overwrite incore superblock counters with just-read data */
    157	spin_lock(&mp->m_sb_lock);
    158	sbp->sb_ifree = ifree;
    159	sbp->sb_icount = ialloc;
    160	sbp->sb_fdblocks = fdblocks;
    161	spin_unlock(&mp->m_sb_lock);
    162
    163	xfs_reinit_percpu_counters(mp);
    164out:
    165	xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS);
    166	return error;
    167}
    168
    169STATIC void
    170__xfs_free_perag(
    171	struct rcu_head	*head)
    172{
    173	struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
    174
    175	ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
    176	kmem_free(pag);
    177}
    178
    179/*
    180 * Free up the per-ag resources associated with the mount structure.
    181 */
    182void
    183xfs_free_perag(
    184	struct xfs_mount	*mp)
    185{
    186	struct xfs_perag	*pag;
    187	xfs_agnumber_t		agno;
    188
    189	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
    190		spin_lock(&mp->m_perag_lock);
    191		pag = radix_tree_delete(&mp->m_perag_tree, agno);
    192		spin_unlock(&mp->m_perag_lock);
    193		ASSERT(pag);
    194		XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
    195
    196		cancel_delayed_work_sync(&pag->pag_blockgc_work);
    197		xfs_iunlink_destroy(pag);
    198		xfs_buf_hash_destroy(pag);
    199
    200		call_rcu(&pag->rcu_head, __xfs_free_perag);
    201	}
    202}
    203
    204int
    205xfs_initialize_perag(
    206	struct xfs_mount	*mp,
    207	xfs_agnumber_t		agcount,
    208	xfs_agnumber_t		*maxagi)
    209{
    210	struct xfs_perag	*pag;
    211	xfs_agnumber_t		index;
    212	xfs_agnumber_t		first_initialised = NULLAGNUMBER;
    213	int			error;
    214
    215	/*
    216	 * Walk the current per-ag tree so we don't try to initialise AGs
    217	 * that already exist (growfs case). Allocate and insert all the
    218	 * AGs we don't find ready for initialisation.
    219	 */
    220	for (index = 0; index < agcount; index++) {
    221		pag = xfs_perag_get(mp, index);
    222		if (pag) {
    223			xfs_perag_put(pag);
    224			continue;
    225		}
    226
    227		pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
    228		if (!pag) {
    229			error = -ENOMEM;
    230			goto out_unwind_new_pags;
    231		}
    232		pag->pag_agno = index;
    233		pag->pag_mount = mp;
    234
    235		error = radix_tree_preload(GFP_NOFS);
    236		if (error)
    237			goto out_free_pag;
    238
    239		spin_lock(&mp->m_perag_lock);
    240		if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
    241			WARN_ON_ONCE(1);
    242			spin_unlock(&mp->m_perag_lock);
    243			radix_tree_preload_end();
    244			error = -EEXIST;
    245			goto out_free_pag;
    246		}
    247		spin_unlock(&mp->m_perag_lock);
    248		radix_tree_preload_end();
    249
    250#ifdef __KERNEL__
    251		/* Place kernel structure only init below this point. */
    252		spin_lock_init(&pag->pag_ici_lock);
    253		spin_lock_init(&pag->pagb_lock);
    254		spin_lock_init(&pag->pag_state_lock);
    255		INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
    256		INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
    257		init_waitqueue_head(&pag->pagb_wait);
    258		pag->pagb_count = 0;
    259		pag->pagb_tree = RB_ROOT;
    260#endif /* __KERNEL__ */
    261
    262		error = xfs_buf_hash_init(pag);
    263		if (error)
    264			goto out_remove_pag;
    265
    266		error = xfs_iunlink_init(pag);
    267		if (error)
    268			goto out_hash_destroy;
    269
    270		/* first new pag is fully initialized */
    271		if (first_initialised == NULLAGNUMBER)
    272			first_initialised = index;
    273	}
    274
    275	index = xfs_set_inode_alloc(mp, agcount);
    276
    277	if (maxagi)
    278		*maxagi = index;
    279
    280	mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
    281	return 0;
    282
    283out_hash_destroy:
    284	xfs_buf_hash_destroy(pag);
    285out_remove_pag:
    286	radix_tree_delete(&mp->m_perag_tree, index);
    287out_free_pag:
    288	kmem_free(pag);
    289out_unwind_new_pags:
    290	/* unwind any prior newly initialized pags */
    291	for (index = first_initialised; index < agcount; index++) {
    292		pag = radix_tree_delete(&mp->m_perag_tree, index);
    293		if (!pag)
    294			break;
    295		xfs_buf_hash_destroy(pag);
    296		xfs_iunlink_destroy(pag);
    297		kmem_free(pag);
    298	}
    299	return error;
    300}
    301
    302static int
    303xfs_get_aghdr_buf(
    304	struct xfs_mount	*mp,
    305	xfs_daddr_t		blkno,
    306	size_t			numblks,
    307	struct xfs_buf		**bpp,
    308	const struct xfs_buf_ops *ops)
    309{
    310	struct xfs_buf		*bp;
    311	int			error;
    312
    313	error = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0, &bp);
    314	if (error)
    315		return error;
    316
    317	bp->b_maps[0].bm_bn = blkno;
    318	bp->b_ops = ops;
    319
    320	*bpp = bp;
    321	return 0;
    322}
    323
    324static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id)
    325{
    326	return mp->m_sb.sb_logstart > 0 &&
    327	       id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
    328}
    329
    330/*
    331 * Generic btree root block init function
    332 */
    333static void
    334xfs_btroot_init(
    335	struct xfs_mount	*mp,
    336	struct xfs_buf		*bp,
    337	struct aghdr_init_data	*id)
    338{
    339	xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno);
    340}
    341
    342/* Finish initializing a free space btree. */
    343static void
    344xfs_freesp_init_recs(
    345	struct xfs_mount	*mp,
    346	struct xfs_buf		*bp,
    347	struct aghdr_init_data	*id)
    348{
    349	struct xfs_alloc_rec	*arec;
    350	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
    351
    352	arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
    353	arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
    354
    355	if (is_log_ag(mp, id)) {
    356		struct xfs_alloc_rec	*nrec;
    357		xfs_agblock_t		start = XFS_FSB_TO_AGBNO(mp,
    358							mp->m_sb.sb_logstart);
    359
    360		ASSERT(start >= mp->m_ag_prealloc_blocks);
    361		if (start != mp->m_ag_prealloc_blocks) {
    362			/*
    363			 * Modify first record to pad stripe align of log
    364			 */
    365			arec->ar_blockcount = cpu_to_be32(start -
    366						mp->m_ag_prealloc_blocks);
    367			nrec = arec + 1;
    368
    369			/*
    370			 * Insert second record at start of internal log
    371			 * which then gets trimmed.
    372			 */
    373			nrec->ar_startblock = cpu_to_be32(
    374					be32_to_cpu(arec->ar_startblock) +
    375					be32_to_cpu(arec->ar_blockcount));
    376			arec = nrec;
    377			be16_add_cpu(&block->bb_numrecs, 1);
    378		}
    379		/*
    380		 * Change record start to after the internal log
    381		 */
    382		be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks);
    383	}
    384
    385	/*
    386	 * Calculate the record block count and check for the case where
    387	 * the log might have consumed all available space in the AG. If
    388	 * so, reset the record count to 0 to avoid exposure of an invalid
    389	 * record start block.
    390	 */
    391	arec->ar_blockcount = cpu_to_be32(id->agsize -
    392					  be32_to_cpu(arec->ar_startblock));
    393	if (!arec->ar_blockcount)
    394		block->bb_numrecs = 0;
    395}
    396
    397/*
    398 * Alloc btree root block init functions
    399 */
    400static void
    401xfs_bnoroot_init(
    402	struct xfs_mount	*mp,
    403	struct xfs_buf		*bp,
    404	struct aghdr_init_data	*id)
    405{
    406	xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
    407	xfs_freesp_init_recs(mp, bp, id);
    408}
    409
    410static void
    411xfs_cntroot_init(
    412	struct xfs_mount	*mp,
    413	struct xfs_buf		*bp,
    414	struct aghdr_init_data	*id)
    415{
    416	xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
    417	xfs_freesp_init_recs(mp, bp, id);
    418}
    419
    420/*
    421 * Reverse map root block init
    422 */
    423static void
    424xfs_rmaproot_init(
    425	struct xfs_mount	*mp,
    426	struct xfs_buf		*bp,
    427	struct aghdr_init_data	*id)
    428{
    429	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
    430	struct xfs_rmap_rec	*rrec;
    431
    432	xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno);
    433
    434	/*
    435	 * mark the AG header regions as static metadata The BNO
    436	 * btree block is the first block after the headers, so
    437	 * it's location defines the size of region the static
    438	 * metadata consumes.
    439	 *
    440	 * Note: unlike mkfs, we never have to account for log
    441	 * space when growing the data regions
    442	 */
    443	rrec = XFS_RMAP_REC_ADDR(block, 1);
    444	rrec->rm_startblock = 0;
    445	rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
    446	rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
    447	rrec->rm_offset = 0;
    448
    449	/* account freespace btree root blocks */
    450	rrec = XFS_RMAP_REC_ADDR(block, 2);
    451	rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
    452	rrec->rm_blockcount = cpu_to_be32(2);
    453	rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
    454	rrec->rm_offset = 0;
    455
    456	/* account inode btree root blocks */
    457	rrec = XFS_RMAP_REC_ADDR(block, 3);
    458	rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
    459	rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
    460					  XFS_IBT_BLOCK(mp));
    461	rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
    462	rrec->rm_offset = 0;
    463
    464	/* account for rmap btree root */
    465	rrec = XFS_RMAP_REC_ADDR(block, 4);
    466	rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
    467	rrec->rm_blockcount = cpu_to_be32(1);
    468	rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
    469	rrec->rm_offset = 0;
    470
    471	/* account for refc btree root */
    472	if (xfs_has_reflink(mp)) {
    473		rrec = XFS_RMAP_REC_ADDR(block, 5);
    474		rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp));
    475		rrec->rm_blockcount = cpu_to_be32(1);
    476		rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC);
    477		rrec->rm_offset = 0;
    478		be16_add_cpu(&block->bb_numrecs, 1);
    479	}
    480
    481	/* account for the log space */
    482	if (is_log_ag(mp, id)) {
    483		rrec = XFS_RMAP_REC_ADDR(block,
    484				be16_to_cpu(block->bb_numrecs) + 1);
    485		rrec->rm_startblock = cpu_to_be32(
    486				XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart));
    487		rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks);
    488		rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG);
    489		rrec->rm_offset = 0;
    490		be16_add_cpu(&block->bb_numrecs, 1);
    491	}
    492}
    493
    494/*
    495 * Initialise new secondary superblocks with the pre-grow geometry, but mark
    496 * them as "in progress" so we know they haven't yet been activated. This will
    497 * get cleared when the update with the new geometry information is done after
    498 * changes to the primary are committed. This isn't strictly necessary, but we
    499 * get it for free with the delayed buffer write lists and it means we can tell
    500 * if a grow operation didn't complete properly after the fact.
    501 */
    502static void
    503xfs_sbblock_init(
    504	struct xfs_mount	*mp,
    505	struct xfs_buf		*bp,
    506	struct aghdr_init_data	*id)
    507{
    508	struct xfs_dsb		*dsb = bp->b_addr;
    509
    510	xfs_sb_to_disk(dsb, &mp->m_sb);
    511	dsb->sb_inprogress = 1;
    512}
    513
    514static void
    515xfs_agfblock_init(
    516	struct xfs_mount	*mp,
    517	struct xfs_buf		*bp,
    518	struct aghdr_init_data	*id)
    519{
    520	struct xfs_agf		*agf = bp->b_addr;
    521	xfs_extlen_t		tmpsize;
    522
    523	agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
    524	agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
    525	agf->agf_seqno = cpu_to_be32(id->agno);
    526	agf->agf_length = cpu_to_be32(id->agsize);
    527	agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp));
    528	agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
    529	agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
    530	agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
    531	if (xfs_has_rmapbt(mp)) {
    532		agf->agf_roots[XFS_BTNUM_RMAPi] =
    533					cpu_to_be32(XFS_RMAP_BLOCK(mp));
    534		agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
    535		agf->agf_rmap_blocks = cpu_to_be32(1);
    536	}
    537
    538	agf->agf_flfirst = cpu_to_be32(1);
    539	agf->agf_fllast = 0;
    540	agf->agf_flcount = 0;
    541	tmpsize = id->agsize - mp->m_ag_prealloc_blocks;
    542	agf->agf_freeblks = cpu_to_be32(tmpsize);
    543	agf->agf_longest = cpu_to_be32(tmpsize);
    544	if (xfs_has_crc(mp))
    545		uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
    546	if (xfs_has_reflink(mp)) {
    547		agf->agf_refcount_root = cpu_to_be32(
    548				xfs_refc_block(mp));
    549		agf->agf_refcount_level = cpu_to_be32(1);
    550		agf->agf_refcount_blocks = cpu_to_be32(1);
    551	}
    552
    553	if (is_log_ag(mp, id)) {
    554		int64_t	logblocks = mp->m_sb.sb_logblocks;
    555
    556		be32_add_cpu(&agf->agf_freeblks, -logblocks);
    557		agf->agf_longest = cpu_to_be32(id->agsize -
    558			XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks);
    559	}
    560}
    561
    562static void
    563xfs_agflblock_init(
    564	struct xfs_mount	*mp,
    565	struct xfs_buf		*bp,
    566	struct aghdr_init_data	*id)
    567{
    568	struct xfs_agfl		*agfl = XFS_BUF_TO_AGFL(bp);
    569	__be32			*agfl_bno;
    570	int			bucket;
    571
    572	if (xfs_has_crc(mp)) {
    573		agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
    574		agfl->agfl_seqno = cpu_to_be32(id->agno);
    575		uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
    576	}
    577
    578	agfl_bno = xfs_buf_to_agfl_bno(bp);
    579	for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++)
    580		agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
    581}
    582
    583static void
    584xfs_agiblock_init(
    585	struct xfs_mount	*mp,
    586	struct xfs_buf		*bp,
    587	struct aghdr_init_data	*id)
    588{
    589	struct xfs_agi		*agi = bp->b_addr;
    590	int			bucket;
    591
    592	agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
    593	agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
    594	agi->agi_seqno = cpu_to_be32(id->agno);
    595	agi->agi_length = cpu_to_be32(id->agsize);
    596	agi->agi_count = 0;
    597	agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp));
    598	agi->agi_level = cpu_to_be32(1);
    599	agi->agi_freecount = 0;
    600	agi->agi_newino = cpu_to_be32(NULLAGINO);
    601	agi->agi_dirino = cpu_to_be32(NULLAGINO);
    602	if (xfs_has_crc(mp))
    603		uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
    604	if (xfs_has_finobt(mp)) {
    605		agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
    606		agi->agi_free_level = cpu_to_be32(1);
    607	}
    608	for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
    609		agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
    610	if (xfs_has_inobtcounts(mp)) {
    611		agi->agi_iblocks = cpu_to_be32(1);
    612		if (xfs_has_finobt(mp))
    613			agi->agi_fblocks = cpu_to_be32(1);
    614	}
    615}
    616
    617typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp,
    618				  struct aghdr_init_data *id);
    619static int
    620xfs_ag_init_hdr(
    621	struct xfs_mount	*mp,
    622	struct aghdr_init_data	*id,
    623	aghdr_init_work_f	work,
    624	const struct xfs_buf_ops *ops)
    625{
    626	struct xfs_buf		*bp;
    627	int			error;
    628
    629	error = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, &bp, ops);
    630	if (error)
    631		return error;
    632
    633	(*work)(mp, bp, id);
    634
    635	xfs_buf_delwri_queue(bp, &id->buffer_list);
    636	xfs_buf_relse(bp);
    637	return 0;
    638}
    639
    640struct xfs_aghdr_grow_data {
    641	xfs_daddr_t		daddr;
    642	size_t			numblks;
    643	const struct xfs_buf_ops *ops;
    644	aghdr_init_work_f	work;
    645	xfs_btnum_t		type;
    646	bool			need_init;
    647};
    648
    649/*
    650 * Prepare new AG headers to be written to disk. We use uncached buffers here,
    651 * as it is assumed these new AG headers are currently beyond the currently
    652 * valid filesystem address space. Using cached buffers would trip over EOFS
    653 * corruption detection alogrithms in the buffer cache lookup routines.
    654 *
    655 * This is a non-transactional function, but the prepared buffers are added to a
    656 * delayed write buffer list supplied by the caller so they can submit them to
    657 * disk and wait on them as required.
    658 */
    659int
    660xfs_ag_init_headers(
    661	struct xfs_mount	*mp,
    662	struct aghdr_init_data	*id)
    663
    664{
    665	struct xfs_aghdr_grow_data aghdr_data[] = {
    666	{ /* SB */
    667		.daddr = XFS_AG_DADDR(mp, id->agno, XFS_SB_DADDR),
    668		.numblks = XFS_FSS_TO_BB(mp, 1),
    669		.ops = &xfs_sb_buf_ops,
    670		.work = &xfs_sbblock_init,
    671		.need_init = true
    672	},
    673	{ /* AGF */
    674		.daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGF_DADDR(mp)),
    675		.numblks = XFS_FSS_TO_BB(mp, 1),
    676		.ops = &xfs_agf_buf_ops,
    677		.work = &xfs_agfblock_init,
    678		.need_init = true
    679	},
    680	{ /* AGFL */
    681		.daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGFL_DADDR(mp)),
    682		.numblks = XFS_FSS_TO_BB(mp, 1),
    683		.ops = &xfs_agfl_buf_ops,
    684		.work = &xfs_agflblock_init,
    685		.need_init = true
    686	},
    687	{ /* AGI */
    688		.daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGI_DADDR(mp)),
    689		.numblks = XFS_FSS_TO_BB(mp, 1),
    690		.ops = &xfs_agi_buf_ops,
    691		.work = &xfs_agiblock_init,
    692		.need_init = true
    693	},
    694	{ /* BNO root block */
    695		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_BNO_BLOCK(mp)),
    696		.numblks = BTOBB(mp->m_sb.sb_blocksize),
    697		.ops = &xfs_bnobt_buf_ops,
    698		.work = &xfs_bnoroot_init,
    699		.need_init = true
    700	},
    701	{ /* CNT root block */
    702		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)),
    703		.numblks = BTOBB(mp->m_sb.sb_blocksize),
    704		.ops = &xfs_cntbt_buf_ops,
    705		.work = &xfs_cntroot_init,
    706		.need_init = true
    707	},
    708	{ /* INO root block */
    709		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_IBT_BLOCK(mp)),
    710		.numblks = BTOBB(mp->m_sb.sb_blocksize),
    711		.ops = &xfs_inobt_buf_ops,
    712		.work = &xfs_btroot_init,
    713		.type = XFS_BTNUM_INO,
    714		.need_init = true
    715	},
    716	{ /* FINO root block */
    717		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_FIBT_BLOCK(mp)),
    718		.numblks = BTOBB(mp->m_sb.sb_blocksize),
    719		.ops = &xfs_finobt_buf_ops,
    720		.work = &xfs_btroot_init,
    721		.type = XFS_BTNUM_FINO,
    722		.need_init =  xfs_has_finobt(mp)
    723	},
    724	{ /* RMAP root block */
    725		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)),
    726		.numblks = BTOBB(mp->m_sb.sb_blocksize),
    727		.ops = &xfs_rmapbt_buf_ops,
    728		.work = &xfs_rmaproot_init,
    729		.need_init = xfs_has_rmapbt(mp)
    730	},
    731	{ /* REFC root block */
    732		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)),
    733		.numblks = BTOBB(mp->m_sb.sb_blocksize),
    734		.ops = &xfs_refcountbt_buf_ops,
    735		.work = &xfs_btroot_init,
    736		.type = XFS_BTNUM_REFC,
    737		.need_init = xfs_has_reflink(mp)
    738	},
    739	{ /* NULL terminating block */
    740		.daddr = XFS_BUF_DADDR_NULL,
    741	}
    742	};
    743	struct  xfs_aghdr_grow_data *dp;
    744	int			error = 0;
    745
    746	/* Account for AG free space in new AG */
    747	id->nfree += id->agsize - mp->m_ag_prealloc_blocks;
    748	for (dp = &aghdr_data[0]; dp->daddr != XFS_BUF_DADDR_NULL; dp++) {
    749		if (!dp->need_init)
    750			continue;
    751
    752		id->daddr = dp->daddr;
    753		id->numblks = dp->numblks;
    754		id->type = dp->type;
    755		error = xfs_ag_init_hdr(mp, id, dp->work, dp->ops);
    756		if (error)
    757			break;
    758	}
    759	return error;
    760}
    761
    762int
    763xfs_ag_shrink_space(
    764	struct xfs_mount	*mp,
    765	struct xfs_trans	**tpp,
    766	xfs_agnumber_t		agno,
    767	xfs_extlen_t		delta)
    768{
    769	struct xfs_alloc_arg	args = {
    770		.tp	= *tpp,
    771		.mp	= mp,
    772		.type	= XFS_ALLOCTYPE_THIS_BNO,
    773		.minlen = delta,
    774		.maxlen = delta,
    775		.oinfo	= XFS_RMAP_OINFO_SKIP_UPDATE,
    776		.resv	= XFS_AG_RESV_NONE,
    777		.prod	= 1
    778	};
    779	struct xfs_buf		*agibp, *agfbp;
    780	struct xfs_agi		*agi;
    781	struct xfs_agf		*agf;
    782	xfs_agblock_t		aglen;
    783	int			error, err2;
    784
    785	ASSERT(agno == mp->m_sb.sb_agcount - 1);
    786	error = xfs_ialloc_read_agi(mp, *tpp, agno, &agibp);
    787	if (error)
    788		return error;
    789
    790	agi = agibp->b_addr;
    791
    792	error = xfs_alloc_read_agf(mp, *tpp, agno, 0, &agfbp);
    793	if (error)
    794		return error;
    795
    796	agf = agfbp->b_addr;
    797	aglen = be32_to_cpu(agi->agi_length);
    798	/* some extra paranoid checks before we shrink the ag */
    799	if (XFS_IS_CORRUPT(mp, agf->agf_length != agi->agi_length))
    800		return -EFSCORRUPTED;
    801	if (delta >= aglen)
    802		return -EINVAL;
    803
    804	args.fsbno = XFS_AGB_TO_FSB(mp, agno, aglen - delta);
    805
    806	/*
    807	 * Make sure that the last inode cluster cannot overlap with the new
    808	 * end of the AG, even if it's sparse.
    809	 */
    810	error = xfs_ialloc_check_shrink(*tpp, agno, agibp, aglen - delta);
    811	if (error)
    812		return error;
    813
    814	/*
    815	 * Disable perag reservations so it doesn't cause the allocation request
    816	 * to fail. We'll reestablish reservation before we return.
    817	 */
    818	error = xfs_ag_resv_free(agibp->b_pag);
    819	if (error)
    820		return error;
    821
    822	/* internal log shouldn't also show up in the free space btrees */
    823	error = xfs_alloc_vextent(&args);
    824	if (!error && args.agbno == NULLAGBLOCK)
    825		error = -ENOSPC;
    826
    827	if (error) {
    828		/*
    829		 * if extent allocation fails, need to roll the transaction to
    830		 * ensure that the AGFL fixup has been committed anyway.
    831		 */
    832		xfs_trans_bhold(*tpp, agfbp);
    833		err2 = xfs_trans_roll(tpp);
    834		if (err2)
    835			return err2;
    836		xfs_trans_bjoin(*tpp, agfbp);
    837		goto resv_init_out;
    838	}
    839
    840	/*
    841	 * if successfully deleted from freespace btrees, need to confirm
    842	 * per-AG reservation works as expected.
    843	 */
    844	be32_add_cpu(&agi->agi_length, -delta);
    845	be32_add_cpu(&agf->agf_length, -delta);
    846
    847	err2 = xfs_ag_resv_init(agibp->b_pag, *tpp);
    848	if (err2) {
    849		be32_add_cpu(&agi->agi_length, delta);
    850		be32_add_cpu(&agf->agf_length, delta);
    851		if (err2 != -ENOSPC)
    852			goto resv_err;
    853
    854		__xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true);
    855
    856		/*
    857		 * Roll the transaction before trying to re-init the per-ag
    858		 * reservation. The new transaction is clean so it will cancel
    859		 * without any side effects.
    860		 */
    861		error = xfs_defer_finish(tpp);
    862		if (error)
    863			return error;
    864
    865		error = -ENOSPC;
    866		goto resv_init_out;
    867	}
    868	xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH);
    869	xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH);
    870	return 0;
    871resv_init_out:
    872	err2 = xfs_ag_resv_init(agibp->b_pag, *tpp);
    873	if (!err2)
    874		return error;
    875resv_err:
    876	xfs_warn(mp, "Error %d reserving per-AG metadata reserve pool.", err2);
    877	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
    878	return err2;
    879}
    880
    881/*
    882 * Extent the AG indicated by the @id by the length passed in
    883 */
    884int
    885xfs_ag_extend_space(
    886	struct xfs_mount	*mp,
    887	struct xfs_trans	*tp,
    888	struct aghdr_init_data	*id,
    889	xfs_extlen_t		len)
    890{
    891	struct xfs_buf		*bp;
    892	struct xfs_agi		*agi;
    893	struct xfs_agf		*agf;
    894	int			error;
    895
    896	/*
    897	 * Change the agi length.
    898	 */
    899	error = xfs_ialloc_read_agi(mp, tp, id->agno, &bp);
    900	if (error)
    901		return error;
    902
    903	agi = bp->b_addr;
    904	be32_add_cpu(&agi->agi_length, len);
    905	ASSERT(id->agno == mp->m_sb.sb_agcount - 1 ||
    906	       be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks);
    907	xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);
    908
    909	/*
    910	 * Change agf length.
    911	 */
    912	error = xfs_alloc_read_agf(mp, tp, id->agno, 0, &bp);
    913	if (error)
    914		return error;
    915
    916	agf = bp->b_addr;
    917	be32_add_cpu(&agf->agf_length, len);
    918	ASSERT(agf->agf_length == agi->agi_length);
    919	xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
    920
    921	/*
    922	 * Free the new space.
    923	 *
    924	 * XFS_RMAP_OINFO_SKIP_UPDATE is used here to tell the rmap btree that
    925	 * this doesn't actually exist in the rmap btree.
    926	 */
    927	error = xfs_rmap_free(tp, bp, bp->b_pag,
    928				be32_to_cpu(agf->agf_length) - len,
    929				len, &XFS_RMAP_OINFO_SKIP_UPDATE);
    930	if (error)
    931		return error;
    932
    933	return  xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, id->agno,
    934					be32_to_cpu(agf->agf_length) - len),
    935				len, &XFS_RMAP_OINFO_SKIP_UPDATE,
    936				XFS_AG_RESV_NONE);
    937}
    938
    939/* Retrieve AG geometry. */
    940int
    941xfs_ag_get_geometry(
    942	struct xfs_mount	*mp,
    943	xfs_agnumber_t		agno,
    944	struct xfs_ag_geometry	*ageo)
    945{
    946	struct xfs_buf		*agi_bp;
    947	struct xfs_buf		*agf_bp;
    948	struct xfs_agi		*agi;
    949	struct xfs_agf		*agf;
    950	struct xfs_perag	*pag;
    951	unsigned int		freeblks;
    952	int			error;
    953
    954	if (agno >= mp->m_sb.sb_agcount)
    955		return -EINVAL;
    956
    957	/* Lock the AG headers. */
    958	error = xfs_ialloc_read_agi(mp, NULL, agno, &agi_bp);
    959	if (error)
    960		return error;
    961	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agf_bp);
    962	if (error)
    963		goto out_agi;
    964
    965	pag = agi_bp->b_pag;
    966
    967	/* Fill out form. */
    968	memset(ageo, 0, sizeof(*ageo));
    969	ageo->ag_number = agno;
    970
    971	agi = agi_bp->b_addr;
    972	ageo->ag_icount = be32_to_cpu(agi->agi_count);
    973	ageo->ag_ifree = be32_to_cpu(agi->agi_freecount);
    974
    975	agf = agf_bp->b_addr;
    976	ageo->ag_length = be32_to_cpu(agf->agf_length);
    977	freeblks = pag->pagf_freeblks +
    978		   pag->pagf_flcount +
    979		   pag->pagf_btreeblks -
    980		   xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE);
    981	ageo->ag_freeblks = freeblks;
    982	xfs_ag_geom_health(pag, ageo);
    983
    984	/* Release resources. */
    985	xfs_buf_relse(agf_bp);
    986out_agi:
    987	xfs_buf_relse(agi_bp);
    988	return error;
    989}