cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

common.c (22134B)


      1// SPDX-License-Identifier: GPL-2.0+
      2/*
      3 * Copyright (C) 2017 Oracle.  All Rights Reserved.
      4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
      5 */
      6#include "xfs.h"
      7#include "xfs_fs.h"
      8#include "xfs_shared.h"
      9#include "xfs_format.h"
     10#include "xfs_trans_resv.h"
     11#include "xfs_mount.h"
     12#include "xfs_btree.h"
     13#include "xfs_log_format.h"
     14#include "xfs_trans.h"
     15#include "xfs_inode.h"
     16#include "xfs_icache.h"
     17#include "xfs_alloc.h"
     18#include "xfs_alloc_btree.h"
     19#include "xfs_ialloc.h"
     20#include "xfs_ialloc_btree.h"
     21#include "xfs_refcount_btree.h"
     22#include "xfs_rmap.h"
     23#include "xfs_rmap_btree.h"
     24#include "xfs_log.h"
     25#include "xfs_trans_priv.h"
     26#include "xfs_da_format.h"
     27#include "xfs_da_btree.h"
     28#include "xfs_attr.h"
     29#include "xfs_reflink.h"
     30#include "xfs_ag.h"
     31#include "scrub/scrub.h"
     32#include "scrub/common.h"
     33#include "scrub/trace.h"
     34#include "scrub/repair.h"
     35#include "scrub/health.h"
     36
     37/* Common code for the metadata scrubbers. */
     38
     39/*
     40 * Handling operational errors.
     41 *
     42 * The *_process_error() family of functions are used to process error return
     43 * codes from functions called as part of a scrub operation.
     44 *
     45 * If there's no error, we return true to tell the caller that it's ok
     46 * to move on to the next check in its list.
     47 *
     48 * For non-verifier errors (e.g. ENOMEM) we return false to tell the
     49 * caller that something bad happened, and we preserve *error so that
     50 * the caller can return the *error up the stack to userspace.
     51 *
     52 * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
     53 * OFLAG_CORRUPT in sm_flags and the *error is cleared.  In other words,
     54 * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
     55 * not via return codes.  We return false to tell the caller that
     56 * something bad happened.  Since the error has been cleared, the caller
     57 * will (presumably) return that zero and scrubbing will move on to
     58 * whatever's next.
     59 *
     60 * ftrace can be used to record the precise metadata location and the
     61 * approximate code location of the failed operation.
     62 */
     63
     64/* Check for operational errors. */
     65static bool
     66__xchk_process_error(
     67	struct xfs_scrub	*sc,
     68	xfs_agnumber_t		agno,
     69	xfs_agblock_t		bno,
     70	int			*error,
     71	__u32			errflag,
     72	void			*ret_ip)
     73{
     74	switch (*error) {
     75	case 0:
     76		return true;
     77	case -EDEADLOCK:
     78		/* Used to restart an op with deadlock avoidance. */
     79		trace_xchk_deadlock_retry(
     80				sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
     81				sc->sm, *error);
     82		break;
     83	case -EFSBADCRC:
     84	case -EFSCORRUPTED:
     85		/* Note the badness but don't abort. */
     86		sc->sm->sm_flags |= errflag;
     87		*error = 0;
     88		fallthrough;
     89	default:
     90		trace_xchk_op_error(sc, agno, bno, *error,
     91				ret_ip);
     92		break;
     93	}
     94	return false;
     95}
     96
     97bool
     98xchk_process_error(
     99	struct xfs_scrub	*sc,
    100	xfs_agnumber_t		agno,
    101	xfs_agblock_t		bno,
    102	int			*error)
    103{
    104	return __xchk_process_error(sc, agno, bno, error,
    105			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
    106}
    107
    108bool
    109xchk_xref_process_error(
    110	struct xfs_scrub	*sc,
    111	xfs_agnumber_t		agno,
    112	xfs_agblock_t		bno,
    113	int			*error)
    114{
    115	return __xchk_process_error(sc, agno, bno, error,
    116			XFS_SCRUB_OFLAG_XFAIL, __return_address);
    117}
    118
    119/* Check for operational errors for a file offset. */
    120static bool
    121__xchk_fblock_process_error(
    122	struct xfs_scrub	*sc,
    123	int			whichfork,
    124	xfs_fileoff_t		offset,
    125	int			*error,
    126	__u32			errflag,
    127	void			*ret_ip)
    128{
    129	switch (*error) {
    130	case 0:
    131		return true;
    132	case -EDEADLOCK:
    133		/* Used to restart an op with deadlock avoidance. */
    134		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
    135		break;
    136	case -EFSBADCRC:
    137	case -EFSCORRUPTED:
    138		/* Note the badness but don't abort. */
    139		sc->sm->sm_flags |= errflag;
    140		*error = 0;
    141		fallthrough;
    142	default:
    143		trace_xchk_file_op_error(sc, whichfork, offset, *error,
    144				ret_ip);
    145		break;
    146	}
    147	return false;
    148}
    149
    150bool
    151xchk_fblock_process_error(
    152	struct xfs_scrub	*sc,
    153	int			whichfork,
    154	xfs_fileoff_t		offset,
    155	int			*error)
    156{
    157	return __xchk_fblock_process_error(sc, whichfork, offset, error,
    158			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
    159}
    160
    161bool
    162xchk_fblock_xref_process_error(
    163	struct xfs_scrub	*sc,
    164	int			whichfork,
    165	xfs_fileoff_t		offset,
    166	int			*error)
    167{
    168	return __xchk_fblock_process_error(sc, whichfork, offset, error,
    169			XFS_SCRUB_OFLAG_XFAIL, __return_address);
    170}
    171
    172/*
    173 * Handling scrub corruption/optimization/warning checks.
    174 *
    175 * The *_set_{corrupt,preen,warning}() family of functions are used to
    176 * record the presence of metadata that is incorrect (corrupt), could be
    177 * optimized somehow (preen), or should be flagged for administrative
    178 * review but is not incorrect (warn).
    179 *
    180 * ftrace can be used to record the precise metadata location and
    181 * approximate code location of the failed check.
    182 */
    183
    184/* Record a block which could be optimized. */
    185void
    186xchk_block_set_preen(
    187	struct xfs_scrub	*sc,
    188	struct xfs_buf		*bp)
    189{
    190	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
    191	trace_xchk_block_preen(sc, xfs_buf_daddr(bp), __return_address);
    192}
    193
    194/*
    195 * Record an inode which could be optimized.  The trace data will
    196 * include the block given by bp if bp is given; otherwise it will use
    197 * the block location of the inode record itself.
    198 */
    199void
    200xchk_ino_set_preen(
    201	struct xfs_scrub	*sc,
    202	xfs_ino_t		ino)
    203{
    204	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
    205	trace_xchk_ino_preen(sc, ino, __return_address);
    206}
    207
    208/* Record something being wrong with the filesystem primary superblock. */
    209void
    210xchk_set_corrupt(
    211	struct xfs_scrub	*sc)
    212{
    213	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
    214	trace_xchk_fs_error(sc, 0, __return_address);
    215}
    216
    217/* Record a corrupt block. */
    218void
    219xchk_block_set_corrupt(
    220	struct xfs_scrub	*sc,
    221	struct xfs_buf		*bp)
    222{
    223	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
    224	trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
    225}
    226
    227/* Record a corruption while cross-referencing. */
    228void
    229xchk_block_xref_set_corrupt(
    230	struct xfs_scrub	*sc,
    231	struct xfs_buf		*bp)
    232{
    233	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
    234	trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
    235}
    236
    237/*
    238 * Record a corrupt inode.  The trace data will include the block given
    239 * by bp if bp is given; otherwise it will use the block location of the
    240 * inode record itself.
    241 */
    242void
    243xchk_ino_set_corrupt(
    244	struct xfs_scrub	*sc,
    245	xfs_ino_t		ino)
    246{
    247	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
    248	trace_xchk_ino_error(sc, ino, __return_address);
    249}
    250
    251/* Record a corruption while cross-referencing with an inode. */
    252void
    253xchk_ino_xref_set_corrupt(
    254	struct xfs_scrub	*sc,
    255	xfs_ino_t		ino)
    256{
    257	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
    258	trace_xchk_ino_error(sc, ino, __return_address);
    259}
    260
    261/* Record corruption in a block indexed by a file fork. */
    262void
    263xchk_fblock_set_corrupt(
    264	struct xfs_scrub	*sc,
    265	int			whichfork,
    266	xfs_fileoff_t		offset)
    267{
    268	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
    269	trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
    270}
    271
    272/* Record a corruption while cross-referencing a fork block. */
    273void
    274xchk_fblock_xref_set_corrupt(
    275	struct xfs_scrub	*sc,
    276	int			whichfork,
    277	xfs_fileoff_t		offset)
    278{
    279	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
    280	trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
    281}
    282
    283/*
    284 * Warn about inodes that need administrative review but is not
    285 * incorrect.
    286 */
    287void
    288xchk_ino_set_warning(
    289	struct xfs_scrub	*sc,
    290	xfs_ino_t		ino)
    291{
    292	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
    293	trace_xchk_ino_warning(sc, ino, __return_address);
    294}
    295
    296/* Warn about a block indexed by a file fork that needs review. */
    297void
    298xchk_fblock_set_warning(
    299	struct xfs_scrub	*sc,
    300	int			whichfork,
    301	xfs_fileoff_t		offset)
    302{
    303	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
    304	trace_xchk_fblock_warning(sc, whichfork, offset, __return_address);
    305}
    306
    307/* Signal an incomplete scrub. */
    308void
    309xchk_set_incomplete(
    310	struct xfs_scrub	*sc)
    311{
    312	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
    313	trace_xchk_incomplete(sc, __return_address);
    314}
    315
    316/*
    317 * rmap scrubbing -- compute the number of blocks with a given owner,
    318 * at least according to the reverse mapping data.
    319 */
    320
    321struct xchk_rmap_ownedby_info {
    322	const struct xfs_owner_info	*oinfo;
    323	xfs_filblks_t			*blocks;
    324};
    325
    326STATIC int
    327xchk_count_rmap_ownedby_irec(
    328	struct xfs_btree_cur		*cur,
    329	const struct xfs_rmap_irec	*rec,
    330	void				*priv)
    331{
    332	struct xchk_rmap_ownedby_info	*sroi = priv;
    333	bool				irec_attr;
    334	bool				oinfo_attr;
    335
    336	irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
    337	oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
    338
    339	if (rec->rm_owner != sroi->oinfo->oi_owner)
    340		return 0;
    341
    342	if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
    343		(*sroi->blocks) += rec->rm_blockcount;
    344
    345	return 0;
    346}
    347
    348/*
    349 * Calculate the number of blocks the rmap thinks are owned by something.
    350 * The caller should pass us an rmapbt cursor.
    351 */
    352int
    353xchk_count_rmap_ownedby_ag(
    354	struct xfs_scrub		*sc,
    355	struct xfs_btree_cur		*cur,
    356	const struct xfs_owner_info	*oinfo,
    357	xfs_filblks_t			*blocks)
    358{
    359	struct xchk_rmap_ownedby_info	sroi = {
    360		.oinfo			= oinfo,
    361		.blocks			= blocks,
    362	};
    363
    364	*blocks = 0;
    365	return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
    366			&sroi);
    367}
    368
    369/*
    370 * AG scrubbing
    371 *
    372 * These helpers facilitate locking an allocation group's header
    373 * buffers, setting up cursors for all btrees that are present, and
    374 * cleaning everything up once we're through.
    375 */
    376
    377/* Decide if we want to return an AG header read failure. */
    378static inline bool
    379want_ag_read_header_failure(
    380	struct xfs_scrub	*sc,
    381	unsigned int		type)
    382{
    383	/* Return all AG header read failures when scanning btrees. */
    384	if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
    385	    sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
    386	    sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
    387		return true;
    388	/*
    389	 * If we're scanning a given type of AG header, we only want to
    390	 * see read failures from that specific header.  We'd like the
    391	 * other headers to cross-check them, but this isn't required.
    392	 */
    393	if (sc->sm->sm_type == type)
    394		return true;
    395	return false;
    396}
    397
    398/*
    399 * Grab the perag structure and all the headers for an AG.
    400 *
    401 * The headers should be released by xchk_ag_free, but as a fail safe we attach
    402 * all the buffers we grab to the scrub transaction so they'll all be freed
    403 * when we cancel it.  Returns ENOENT if we can't grab the perag structure.
    404 */
    405int
    406xchk_ag_read_headers(
    407	struct xfs_scrub	*sc,
    408	xfs_agnumber_t		agno,
    409	struct xchk_ag		*sa)
    410{
    411	struct xfs_mount	*mp = sc->mp;
    412	int			error;
    413
    414	ASSERT(!sa->pag);
    415	sa->pag = xfs_perag_get(mp, agno);
    416	if (!sa->pag)
    417		return -ENOENT;
    418
    419	error = xfs_ialloc_read_agi(mp, sc->tp, agno, &sa->agi_bp);
    420	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
    421		return error;
    422
    423	error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &sa->agf_bp);
    424	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
    425		return error;
    426
    427	error = xfs_alloc_read_agfl(mp, sc->tp, agno, &sa->agfl_bp);
    428	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
    429		return error;
    430
    431	return 0;
    432}
    433
    434/* Release all the AG btree cursors. */
    435void
    436xchk_ag_btcur_free(
    437	struct xchk_ag		*sa)
    438{
    439	if (sa->refc_cur)
    440		xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
    441	if (sa->rmap_cur)
    442		xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
    443	if (sa->fino_cur)
    444		xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
    445	if (sa->ino_cur)
    446		xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
    447	if (sa->cnt_cur)
    448		xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
    449	if (sa->bno_cur)
    450		xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
    451
    452	sa->refc_cur = NULL;
    453	sa->rmap_cur = NULL;
    454	sa->fino_cur = NULL;
    455	sa->ino_cur = NULL;
    456	sa->bno_cur = NULL;
    457	sa->cnt_cur = NULL;
    458}
    459
    460/* Initialize all the btree cursors for an AG. */
    461void
    462xchk_ag_btcur_init(
    463	struct xfs_scrub	*sc,
    464	struct xchk_ag		*sa)
    465{
    466	struct xfs_mount	*mp = sc->mp;
    467
    468	if (sa->agf_bp &&
    469	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_BNO)) {
    470		/* Set up a bnobt cursor for cross-referencing. */
    471		sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
    472				sa->pag, XFS_BTNUM_BNO);
    473	}
    474
    475	if (sa->agf_bp &&
    476	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_CNT)) {
    477		/* Set up a cntbt cursor for cross-referencing. */
    478		sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
    479				sa->pag, XFS_BTNUM_CNT);
    480	}
    481
    482	/* Set up a inobt cursor for cross-referencing. */
    483	if (sa->agi_bp &&
    484	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) {
    485		sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
    486				sa->pag, XFS_BTNUM_INO);
    487	}
    488
    489	/* Set up a finobt cursor for cross-referencing. */
    490	if (sa->agi_bp && xfs_has_finobt(mp) &&
    491	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
    492		sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
    493				sa->pag, XFS_BTNUM_FINO);
    494	}
    495
    496	/* Set up a rmapbt cursor for cross-referencing. */
    497	if (sa->agf_bp && xfs_has_rmapbt(mp) &&
    498	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) {
    499		sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
    500				sa->pag);
    501	}
    502
    503	/* Set up a refcountbt cursor for cross-referencing. */
    504	if (sa->agf_bp && xfs_has_reflink(mp) &&
    505	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) {
    506		sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
    507				sa->agf_bp, sa->pag);
    508	}
    509}
    510
    511/* Release the AG header context and btree cursors. */
    512void
    513xchk_ag_free(
    514	struct xfs_scrub	*sc,
    515	struct xchk_ag		*sa)
    516{
    517	xchk_ag_btcur_free(sa);
    518	if (sa->agfl_bp) {
    519		xfs_trans_brelse(sc->tp, sa->agfl_bp);
    520		sa->agfl_bp = NULL;
    521	}
    522	if (sa->agf_bp) {
    523		xfs_trans_brelse(sc->tp, sa->agf_bp);
    524		sa->agf_bp = NULL;
    525	}
    526	if (sa->agi_bp) {
    527		xfs_trans_brelse(sc->tp, sa->agi_bp);
    528		sa->agi_bp = NULL;
    529	}
    530	if (sa->pag) {
    531		xfs_perag_put(sa->pag);
    532		sa->pag = NULL;
    533	}
    534}
    535
    536/*
    537 * For scrub, grab the perag structure, the AGI, and the AGF headers, in that
    538 * order.  Locking order requires us to get the AGI before the AGF.  We use the
    539 * transaction to avoid deadlocking on crosslinked metadata buffers; either the
    540 * caller passes one in (bmap scrub) or we have to create a transaction
    541 * ourselves.  Returns ENOENT if the perag struct cannot be grabbed.
    542 */
    543int
    544xchk_ag_init(
    545	struct xfs_scrub	*sc,
    546	xfs_agnumber_t		agno,
    547	struct xchk_ag		*sa)
    548{
    549	int			error;
    550
    551	error = xchk_ag_read_headers(sc, agno, sa);
    552	if (error)
    553		return error;
    554
    555	xchk_ag_btcur_init(sc, sa);
    556	return 0;
    557}
    558
    559/* Per-scrubber setup functions */
    560
    561/*
    562 * Grab an empty transaction so that we can re-grab locked buffers if
    563 * one of our btrees turns out to be cyclic.
    564 *
    565 * If we're going to repair something, we need to ask for the largest possible
    566 * log reservation so that we can handle the worst case scenario for metadata
    567 * updates while rebuilding a metadata item.  We also need to reserve as many
    568 * blocks in the head transaction as we think we're going to need to rebuild
    569 * the metadata object.
    570 */
    571int
    572xchk_trans_alloc(
    573	struct xfs_scrub	*sc,
    574	uint			resblks)
    575{
    576	if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
    577		return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
    578				resblks, 0, 0, &sc->tp);
    579
    580	return xfs_trans_alloc_empty(sc->mp, &sc->tp);
    581}
    582
    583/* Set us up with a transaction and an empty context. */
    584int
    585xchk_setup_fs(
    586	struct xfs_scrub	*sc)
    587{
    588	uint			resblks;
    589
    590	resblks = xrep_calc_ag_resblks(sc);
    591	return xchk_trans_alloc(sc, resblks);
    592}
    593
    594/* Set us up with AG headers and btree cursors. */
    595int
    596xchk_setup_ag_btree(
    597	struct xfs_scrub	*sc,
    598	bool			force_log)
    599{
    600	struct xfs_mount	*mp = sc->mp;
    601	int			error;
    602
    603	/*
    604	 * If the caller asks us to checkpont the log, do so.  This
    605	 * expensive operation should be performed infrequently and only
    606	 * as a last resort.  Any caller that sets force_log should
    607	 * document why they need to do so.
    608	 */
    609	if (force_log) {
    610		error = xchk_checkpoint_log(mp);
    611		if (error)
    612			return error;
    613	}
    614
    615	error = xchk_setup_fs(sc);
    616	if (error)
    617		return error;
    618
    619	return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa);
    620}
    621
    622/* Push everything out of the log onto disk. */
    623int
    624xchk_checkpoint_log(
    625	struct xfs_mount	*mp)
    626{
    627	int			error;
    628
    629	error = xfs_log_force(mp, XFS_LOG_SYNC);
    630	if (error)
    631		return error;
    632	xfs_ail_push_all_sync(mp->m_ail);
    633	return 0;
    634}
    635
    636/*
    637 * Given an inode and the scrub control structure, grab either the
    638 * inode referenced in the control structure or the inode passed in.
    639 * The inode is not locked.
    640 */
    641int
    642xchk_get_inode(
    643	struct xfs_scrub	*sc)
    644{
    645	struct xfs_imap		imap;
    646	struct xfs_mount	*mp = sc->mp;
    647	struct xfs_inode	*ip_in = XFS_I(file_inode(sc->file));
    648	struct xfs_inode	*ip = NULL;
    649	int			error;
    650
    651	/* We want to scan the inode we already had opened. */
    652	if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
    653		sc->ip = ip_in;
    654		return 0;
    655	}
    656
    657	/* Look up the inode, see if the generation number matches. */
    658	if (xfs_internal_inum(mp, sc->sm->sm_ino))
    659		return -ENOENT;
    660	error = xfs_iget(mp, NULL, sc->sm->sm_ino,
    661			XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
    662	switch (error) {
    663	case -ENOENT:
    664		/* Inode doesn't exist, just bail out. */
    665		return error;
    666	case 0:
    667		/* Got an inode, continue. */
    668		break;
    669	case -EINVAL:
    670		/*
    671		 * -EINVAL with IGET_UNTRUSTED could mean one of several
    672		 * things: userspace gave us an inode number that doesn't
    673		 * correspond to fs space, or doesn't have an inobt entry;
    674		 * or it could simply mean that the inode buffer failed the
    675		 * read verifiers.
    676		 *
    677		 * Try just the inode mapping lookup -- if it succeeds, then
    678		 * the inode buffer verifier failed and something needs fixing.
    679		 * Otherwise, we really couldn't find it so tell userspace
    680		 * that it no longer exists.
    681		 */
    682		error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap,
    683				XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
    684		if (error)
    685			return -ENOENT;
    686		error = -EFSCORRUPTED;
    687		fallthrough;
    688	default:
    689		trace_xchk_op_error(sc,
    690				XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
    691				XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
    692				error, __return_address);
    693		return error;
    694	}
    695	if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
    696		xfs_irele(ip);
    697		return -ENOENT;
    698	}
    699
    700	sc->ip = ip;
    701	return 0;
    702}
    703
    704/* Set us up to scrub a file's contents. */
    705int
    706xchk_setup_inode_contents(
    707	struct xfs_scrub	*sc,
    708	unsigned int		resblks)
    709{
    710	int			error;
    711
    712	error = xchk_get_inode(sc);
    713	if (error)
    714		return error;
    715
    716	/* Got the inode, lock it and we're ready to go. */
    717	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
    718	xfs_ilock(sc->ip, sc->ilock_flags);
    719	error = xchk_trans_alloc(sc, resblks);
    720	if (error)
    721		goto out;
    722	sc->ilock_flags |= XFS_ILOCK_EXCL;
    723	xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
    724
    725out:
    726	/* scrub teardown will unlock and release the inode for us */
    727	return error;
    728}
    729
    730/*
    731 * Predicate that decides if we need to evaluate the cross-reference check.
    732 * If there was an error accessing the cross-reference btree, just delete
    733 * the cursor and skip the check.
    734 */
    735bool
    736xchk_should_check_xref(
    737	struct xfs_scrub	*sc,
    738	int			*error,
    739	struct xfs_btree_cur	**curpp)
    740{
    741	/* No point in xref if we already know we're corrupt. */
    742	if (xchk_skip_xref(sc->sm))
    743		return false;
    744
    745	if (*error == 0)
    746		return true;
    747
    748	if (curpp) {
    749		/* If we've already given up on xref, just bail out. */
    750		if (!*curpp)
    751			return false;
    752
    753		/* xref error, delete cursor and bail out. */
    754		xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
    755		*curpp = NULL;
    756	}
    757
    758	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
    759	trace_xchk_xref_error(sc, *error, __return_address);
    760
    761	/*
    762	 * Errors encountered during cross-referencing with another
    763	 * data structure should not cause this scrubber to abort.
    764	 */
    765	*error = 0;
    766	return false;
    767}
    768
    769/* Run the structure verifiers on in-memory buffers to detect bad memory. */
    770void
    771xchk_buffer_recheck(
    772	struct xfs_scrub	*sc,
    773	struct xfs_buf		*bp)
    774{
    775	xfs_failaddr_t		fa;
    776
    777	if (bp->b_ops == NULL) {
    778		xchk_block_set_corrupt(sc, bp);
    779		return;
    780	}
    781	if (bp->b_ops->verify_struct == NULL) {
    782		xchk_set_incomplete(sc);
    783		return;
    784	}
    785	fa = bp->b_ops->verify_struct(bp);
    786	if (!fa)
    787		return;
    788	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
    789	trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa);
    790}
    791
    792/*
    793 * Scrub the attr/data forks of a metadata inode.  The metadata inode must be
    794 * pointed to by sc->ip and the ILOCK must be held.
    795 */
    796int
    797xchk_metadata_inode_forks(
    798	struct xfs_scrub	*sc)
    799{
    800	__u32			smtype;
    801	bool			shared;
    802	int			error;
    803
    804	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
    805		return 0;
    806
    807	/* Metadata inodes don't live on the rt device. */
    808	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) {
    809		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
    810		return 0;
    811	}
    812
    813	/* They should never participate in reflink. */
    814	if (xfs_is_reflink_inode(sc->ip)) {
    815		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
    816		return 0;
    817	}
    818
    819	/* They also should never have extended attributes. */
    820	if (xfs_inode_hasattr(sc->ip)) {
    821		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
    822		return 0;
    823	}
    824
    825	/* Invoke the data fork scrubber. */
    826	smtype = sc->sm->sm_type;
    827	sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD;
    828	error = xchk_bmap_data(sc);
    829	sc->sm->sm_type = smtype;
    830	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
    831		return error;
    832
    833	/* Look for incorrect shared blocks. */
    834	if (xfs_has_reflink(sc->mp)) {
    835		error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
    836				&shared);
    837		if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
    838				&error))
    839			return error;
    840		if (shared)
    841			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
    842	}
    843
    844	return error;
    845}
    846
    847/*
    848 * Try to lock an inode in violation of the usual locking order rules.  For
    849 * example, trying to get the IOLOCK while in transaction context, or just
    850 * plain breaking AG-order or inode-order inode locking rules.  Either way,
    851 * the only way to avoid an ABBA deadlock is to use trylock and back off if
    852 * we can't.
    853 */
    854int
    855xchk_ilock_inverted(
    856	struct xfs_inode	*ip,
    857	uint			lock_mode)
    858{
    859	int			i;
    860
    861	for (i = 0; i < 20; i++) {
    862		if (xfs_ilock_nowait(ip, lock_mode))
    863			return 0;
    864		delay(1);
    865	}
    866	return -EDEADLOCK;
    867}
    868
    869/* Pause background reaping of resources. */
    870void
    871xchk_stop_reaping(
    872	struct xfs_scrub	*sc)
    873{
    874	sc->flags |= XCHK_REAPING_DISABLED;
    875	xfs_blockgc_stop(sc->mp);
    876	xfs_inodegc_stop(sc->mp);
    877}
    878
    879/* Restart background reaping of resources. */
    880void
    881xchk_start_reaping(
    882	struct xfs_scrub	*sc)
    883{
    884	/*
    885	 * Readonly filesystems do not perform inactivation or speculative
    886	 * preallocation, so there's no need to restart the workers.
    887	 */
    888	if (!xfs_is_readonly(sc->mp)) {
    889		xfs_inodegc_start(sc->mp);
    890		xfs_blockgc_start(sc->mp);
    891	}
    892	sc->flags &= ~XCHK_REAPING_DISABLED;
    893}