xfs_super.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
xfs_super.c (60589B)
      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
      4 * All Rights Reserved.
      5 */
      6
      7#include "xfs.h"
      8#include "xfs_shared.h"
      9#include "xfs_format.h"
     10#include "xfs_log_format.h"
     11#include "xfs_trans_resv.h"
     12#include "xfs_sb.h"
     13#include "xfs_mount.h"
     14#include "xfs_inode.h"
     15#include "xfs_btree.h"
     16#include "xfs_bmap.h"
     17#include "xfs_alloc.h"
     18#include "xfs_fsops.h"
     19#include "xfs_trans.h"
     20#include "xfs_buf_item.h"
     21#include "xfs_log.h"
     22#include "xfs_log_priv.h"
     23#include "xfs_dir2.h"
     24#include "xfs_extfree_item.h"
     25#include "xfs_mru_cache.h"
     26#include "xfs_inode_item.h"
     27#include "xfs_icache.h"
     28#include "xfs_trace.h"
     29#include "xfs_icreate_item.h"
     30#include "xfs_filestream.h"
     31#include "xfs_quota.h"
     32#include "xfs_sysfs.h"
     33#include "xfs_ondisk.h"
     34#include "xfs_rmap_item.h"
     35#include "xfs_refcount_item.h"
     36#include "xfs_bmap_item.h"
     37#include "xfs_reflink.h"
     38#include "xfs_pwork.h"
     39#include "xfs_ag.h"
     40#include "xfs_defer.h"
     41#include "xfs_attr_item.h"
     42#include "xfs_xattr.h"
     43
     44#include <linux/magic.h>
     45#include <linux/fs_context.h>
     46#include <linux/fs_parser.h>
     47
     48static const struct super_operations xfs_super_operations;
     49
     50static struct kset *xfs_kset;		/* top-level xfs sysfs dir */
     51#ifdef DEBUG
     52static struct xfs_kobj xfs_dbg_kobj;	/* global debug sysfs attrs */
     53#endif
     54
     55#ifdef CONFIG_HOTPLUG_CPU
     56static LIST_HEAD(xfs_mount_list);
     57static DEFINE_SPINLOCK(xfs_mount_list_lock);
     58
     59static inline void xfs_mount_list_add(struct xfs_mount *mp)
     60{
     61	spin_lock(&xfs_mount_list_lock);
     62	list_add(&mp->m_mount_list, &xfs_mount_list);
     63	spin_unlock(&xfs_mount_list_lock);
     64}
     65
     66static inline void xfs_mount_list_del(struct xfs_mount *mp)
     67{
     68	spin_lock(&xfs_mount_list_lock);
     69	list_del(&mp->m_mount_list);
     70	spin_unlock(&xfs_mount_list_lock);
     71}
     72#else /* !CONFIG_HOTPLUG_CPU */
     73static inline void xfs_mount_list_add(struct xfs_mount *mp) {}
     74static inline void xfs_mount_list_del(struct xfs_mount *mp) {}
     75#endif
     76
     77enum xfs_dax_mode {
     78	XFS_DAX_INODE = 0,
     79	XFS_DAX_ALWAYS = 1,
     80	XFS_DAX_NEVER = 2,
     81};
     82
     83static void
     84xfs_mount_set_dax_mode(
     85	struct xfs_mount	*mp,
     86	enum xfs_dax_mode	mode)
     87{
     88	switch (mode) {
     89	case XFS_DAX_INODE:
     90		mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER);
     91		break;
     92	case XFS_DAX_ALWAYS:
     93		mp->m_features |= XFS_FEAT_DAX_ALWAYS;
     94		mp->m_features &= ~XFS_FEAT_DAX_NEVER;
     95		break;
     96	case XFS_DAX_NEVER:
     97		mp->m_features |= XFS_FEAT_DAX_NEVER;
     98		mp->m_features &= ~XFS_FEAT_DAX_ALWAYS;
     99		break;
    100	}
    101}
    102
    103static const struct constant_table dax_param_enums[] = {
    104	{"inode",	XFS_DAX_INODE },
    105	{"always",	XFS_DAX_ALWAYS },
    106	{"never",	XFS_DAX_NEVER },
    107	{}
    108};
    109
    110/*
    111 * Table driven mount option parser.
    112 */
    113enum {
    114	Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
    115	Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
    116	Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
    117	Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
    118	Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
    119	Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
    120	Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
    121	Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
    122	Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum,
    123};
    124
    125static const struct fs_parameter_spec xfs_fs_parameters[] = {
    126	fsparam_u32("logbufs",		Opt_logbufs),
    127	fsparam_string("logbsize",	Opt_logbsize),
    128	fsparam_string("logdev",	Opt_logdev),
    129	fsparam_string("rtdev",		Opt_rtdev),
    130	fsparam_flag("wsync",		Opt_wsync),
    131	fsparam_flag("noalign",		Opt_noalign),
    132	fsparam_flag("swalloc",		Opt_swalloc),
    133	fsparam_u32("sunit",		Opt_sunit),
    134	fsparam_u32("swidth",		Opt_swidth),
    135	fsparam_flag("nouuid",		Opt_nouuid),
    136	fsparam_flag("grpid",		Opt_grpid),
    137	fsparam_flag("nogrpid",		Opt_nogrpid),
    138	fsparam_flag("bsdgroups",	Opt_bsdgroups),
    139	fsparam_flag("sysvgroups",	Opt_sysvgroups),
    140	fsparam_string("allocsize",	Opt_allocsize),
    141	fsparam_flag("norecovery",	Opt_norecovery),
    142	fsparam_flag("inode64",		Opt_inode64),
    143	fsparam_flag("inode32",		Opt_inode32),
    144	fsparam_flag("ikeep",		Opt_ikeep),
    145	fsparam_flag("noikeep",		Opt_noikeep),
    146	fsparam_flag("largeio",		Opt_largeio),
    147	fsparam_flag("nolargeio",	Opt_nolargeio),
    148	fsparam_flag("attr2",		Opt_attr2),
    149	fsparam_flag("noattr2",		Opt_noattr2),
    150	fsparam_flag("filestreams",	Opt_filestreams),
    151	fsparam_flag("quota",		Opt_quota),
    152	fsparam_flag("noquota",		Opt_noquota),
    153	fsparam_flag("usrquota",	Opt_usrquota),
    154	fsparam_flag("grpquota",	Opt_grpquota),
    155	fsparam_flag("prjquota",	Opt_prjquota),
    156	fsparam_flag("uquota",		Opt_uquota),
    157	fsparam_flag("gquota",		Opt_gquota),
    158	fsparam_flag("pquota",		Opt_pquota),
    159	fsparam_flag("uqnoenforce",	Opt_uqnoenforce),
    160	fsparam_flag("gqnoenforce",	Opt_gqnoenforce),
    161	fsparam_flag("pqnoenforce",	Opt_pqnoenforce),
    162	fsparam_flag("qnoenforce",	Opt_qnoenforce),
    163	fsparam_flag("discard",		Opt_discard),
    164	fsparam_flag("nodiscard",	Opt_nodiscard),
    165	fsparam_flag("dax",		Opt_dax),
    166	fsparam_enum("dax",		Opt_dax_enum, dax_param_enums),
    167	{}
    168};
    169
    170struct proc_xfs_info {
    171	uint64_t	flag;
    172	char		*str;
    173};
    174
    175static int
    176xfs_fs_show_options(
    177	struct seq_file		*m,
    178	struct dentry		*root)
    179{
    180	static struct proc_xfs_info xfs_info_set[] = {
    181		/* the few simple ones we can get from the mount struct */
    182		{ XFS_FEAT_IKEEP,		",ikeep" },
    183		{ XFS_FEAT_WSYNC,		",wsync" },
    184		{ XFS_FEAT_NOALIGN,		",noalign" },
    185		{ XFS_FEAT_SWALLOC,		",swalloc" },
    186		{ XFS_FEAT_NOUUID,		",nouuid" },
    187		{ XFS_FEAT_NORECOVERY,		",norecovery" },
    188		{ XFS_FEAT_ATTR2,		",attr2" },
    189		{ XFS_FEAT_FILESTREAMS,		",filestreams" },
    190		{ XFS_FEAT_GRPID,		",grpid" },
    191		{ XFS_FEAT_DISCARD,		",discard" },
    192		{ XFS_FEAT_LARGE_IOSIZE,	",largeio" },
    193		{ XFS_FEAT_DAX_ALWAYS,		",dax=always" },
    194		{ XFS_FEAT_DAX_NEVER,		",dax=never" },
    195		{ 0, NULL }
    196	};
    197	struct xfs_mount	*mp = XFS_M(root->d_sb);
    198	struct proc_xfs_info	*xfs_infop;
    199
    200	for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
    201		if (mp->m_features & xfs_infop->flag)
    202			seq_puts(m, xfs_infop->str);
    203	}
    204
    205	seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64);
    206
    207	if (xfs_has_allocsize(mp))
    208		seq_printf(m, ",allocsize=%dk",
    209			   (1 << mp->m_allocsize_log) >> 10);
    210
    211	if (mp->m_logbufs > 0)
    212		seq_printf(m, ",logbufs=%d", mp->m_logbufs);
    213	if (mp->m_logbsize > 0)
    214		seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
    215
    216	if (mp->m_logname)
    217		seq_show_option(m, "logdev", mp->m_logname);
    218	if (mp->m_rtname)
    219		seq_show_option(m, "rtdev", mp->m_rtname);
    220
    221	if (mp->m_dalign > 0)
    222		seq_printf(m, ",sunit=%d",
    223				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
    224	if (mp->m_swidth > 0)
    225		seq_printf(m, ",swidth=%d",
    226				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
    227
    228	if (mp->m_qflags & XFS_UQUOTA_ENFD)
    229		seq_puts(m, ",usrquota");
    230	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
    231		seq_puts(m, ",uqnoenforce");
    232
    233	if (mp->m_qflags & XFS_PQUOTA_ENFD)
    234		seq_puts(m, ",prjquota");
    235	else if (mp->m_qflags & XFS_PQUOTA_ACCT)
    236		seq_puts(m, ",pqnoenforce");
    237
    238	if (mp->m_qflags & XFS_GQUOTA_ENFD)
    239		seq_puts(m, ",grpquota");
    240	else if (mp->m_qflags & XFS_GQUOTA_ACCT)
    241		seq_puts(m, ",gqnoenforce");
    242
    243	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
    244		seq_puts(m, ",noquota");
    245
    246	return 0;
    247}
    248
    249/*
    250 * Set parameters for inode allocation heuristics, taking into account
    251 * filesystem size and inode32/inode64 mount options; i.e. specifically
    252 * whether or not XFS_FEAT_SMALL_INUMS is set.
    253 *
    254 * Inode allocation patterns are altered only if inode32 is requested
    255 * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large.
    256 * If altered, XFS_OPSTATE_INODE32 is set as well.
    257 *
    258 * An agcount independent of that in the mount structure is provided
    259 * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
    260 * to the potentially higher ag count.
    261 *
    262 * Returns the maximum AG index which may contain inodes.
    263 */
    264xfs_agnumber_t
    265xfs_set_inode_alloc(
    266	struct xfs_mount *mp,
    267	xfs_agnumber_t	agcount)
    268{
    269	xfs_agnumber_t	index;
    270	xfs_agnumber_t	maxagi = 0;
    271	xfs_sb_t	*sbp = &mp->m_sb;
    272	xfs_agnumber_t	max_metadata;
    273	xfs_agino_t	agino;
    274	xfs_ino_t	ino;
    275
    276	/*
    277	 * Calculate how much should be reserved for inodes to meet
    278	 * the max inode percentage.  Used only for inode32.
    279	 */
    280	if (M_IGEO(mp)->maxicount) {
    281		uint64_t	icount;
    282
    283		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
    284		do_div(icount, 100);
    285		icount += sbp->sb_agblocks - 1;
    286		do_div(icount, sbp->sb_agblocks);
    287		max_metadata = icount;
    288	} else {
    289		max_metadata = agcount;
    290	}
    291
    292	/* Get the last possible inode in the filesystem */
    293	agino =	XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
    294	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
    295
    296	/*
    297	 * If user asked for no more than 32-bit inodes, and the fs is
    298	 * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter
    299	 * the allocator to accommodate the request.
    300	 */
    301	if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
    302		set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
    303	else
    304		clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
    305
    306	for (index = 0; index < agcount; index++) {
    307		struct xfs_perag	*pag;
    308
    309		ino = XFS_AGINO_TO_INO(mp, index, agino);
    310
    311		pag = xfs_perag_get(mp, index);
    312
    313		if (xfs_is_inode32(mp)) {
    314			if (ino > XFS_MAXINUMBER_32) {
    315				pag->pagi_inodeok = 0;
    316				pag->pagf_metadata = 0;
    317			} else {
    318				pag->pagi_inodeok = 1;
    319				maxagi++;
    320				if (index < max_metadata)
    321					pag->pagf_metadata = 1;
    322				else
    323					pag->pagf_metadata = 0;
    324			}
    325		} else {
    326			pag->pagi_inodeok = 1;
    327			pag->pagf_metadata = 0;
    328		}
    329
    330		xfs_perag_put(pag);
    331	}
    332
    333	return xfs_is_inode32(mp) ? maxagi : agcount;
    334}
    335
    336static int
    337xfs_setup_dax_always(
    338	struct xfs_mount	*mp)
    339{
    340	if (!mp->m_ddev_targp->bt_daxdev &&
    341	    (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) {
    342		xfs_alert(mp,
    343			"DAX unsupported by block device. Turning off DAX.");
    344		goto disable_dax;
    345	}
    346
    347	if (mp->m_super->s_blocksize != PAGE_SIZE) {
    348		xfs_alert(mp,
    349			"DAX not supported for blocksize. Turning off DAX.");
    350		goto disable_dax;
    351	}
    352
    353	if (xfs_has_reflink(mp)) {
    354		xfs_alert(mp, "DAX and reflink cannot be used together!");
    355		return -EINVAL;
    356	}
    357
    358	xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
    359	return 0;
    360
    361disable_dax:
    362	xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
    363	return 0;
    364}
    365
    366STATIC int
    367xfs_blkdev_get(
    368	xfs_mount_t		*mp,
    369	const char		*name,
    370	struct block_device	**bdevp)
    371{
    372	int			error = 0;
    373
    374	*bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
    375				    mp);
    376	if (IS_ERR(*bdevp)) {
    377		error = PTR_ERR(*bdevp);
    378		xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
    379	}
    380
    381	return error;
    382}
    383
    384STATIC void
    385xfs_blkdev_put(
    386	struct block_device	*bdev)
    387{
    388	if (bdev)
    389		blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
    390}
    391
    392STATIC void
    393xfs_close_devices(
    394	struct xfs_mount	*mp)
    395{
    396	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
    397		struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
    398
    399		xfs_free_buftarg(mp->m_logdev_targp);
    400		xfs_blkdev_put(logdev);
    401	}
    402	if (mp->m_rtdev_targp) {
    403		struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
    404
    405		xfs_free_buftarg(mp->m_rtdev_targp);
    406		xfs_blkdev_put(rtdev);
    407	}
    408	xfs_free_buftarg(mp->m_ddev_targp);
    409}
    410
    411/*
    412 * The file system configurations are:
    413 *	(1) device (partition) with data and internal log
    414 *	(2) logical volume with data and log subvolumes.
    415 *	(3) logical volume with data, log, and realtime subvolumes.
    416 *
    417 * We only have to handle opening the log and realtime volumes here if
    418 * they are present.  The data subvolume has already been opened by
    419 * get_sb_bdev() and is stored in sb->s_bdev.
    420 */
    421STATIC int
    422xfs_open_devices(
    423	struct xfs_mount	*mp)
    424{
    425	struct block_device	*ddev = mp->m_super->s_bdev;
    426	struct block_device	*logdev = NULL, *rtdev = NULL;
    427	int			error;
    428
    429	/*
    430	 * Open real time and log devices - order is important.
    431	 */
    432	if (mp->m_logname) {
    433		error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
    434		if (error)
    435			return error;
    436	}
    437
    438	if (mp->m_rtname) {
    439		error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
    440		if (error)
    441			goto out_close_logdev;
    442
    443		if (rtdev == ddev || rtdev == logdev) {
    444			xfs_warn(mp,
    445	"Cannot mount filesystem with identical rtdev and ddev/logdev.");
    446			error = -EINVAL;
    447			goto out_close_rtdev;
    448		}
    449	}
    450
    451	/*
    452	 * Setup xfs_mount buffer target pointers
    453	 */
    454	error = -ENOMEM;
    455	mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
    456	if (!mp->m_ddev_targp)
    457		goto out_close_rtdev;
    458
    459	if (rtdev) {
    460		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
    461		if (!mp->m_rtdev_targp)
    462			goto out_free_ddev_targ;
    463	}
    464
    465	if (logdev && logdev != ddev) {
    466		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
    467		if (!mp->m_logdev_targp)
    468			goto out_free_rtdev_targ;
    469	} else {
    470		mp->m_logdev_targp = mp->m_ddev_targp;
    471	}
    472
    473	return 0;
    474
    475 out_free_rtdev_targ:
    476	if (mp->m_rtdev_targp)
    477		xfs_free_buftarg(mp->m_rtdev_targp);
    478 out_free_ddev_targ:
    479	xfs_free_buftarg(mp->m_ddev_targp);
    480 out_close_rtdev:
    481	xfs_blkdev_put(rtdev);
    482 out_close_logdev:
    483	if (logdev && logdev != ddev)
    484		xfs_blkdev_put(logdev);
    485	return error;
    486}
    487
    488/*
    489 * Setup xfs_mount buffer target pointers based on superblock
    490 */
    491STATIC int
    492xfs_setup_devices(
    493	struct xfs_mount	*mp)
    494{
    495	int			error;
    496
    497	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
    498	if (error)
    499		return error;
    500
    501	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
    502		unsigned int	log_sector_size = BBSIZE;
    503
    504		if (xfs_has_sector(mp))
    505			log_sector_size = mp->m_sb.sb_logsectsize;
    506		error = xfs_setsize_buftarg(mp->m_logdev_targp,
    507					    log_sector_size);
    508		if (error)
    509			return error;
    510	}
    511	if (mp->m_rtdev_targp) {
    512		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
    513					    mp->m_sb.sb_sectsize);
    514		if (error)
    515			return error;
    516	}
    517
    518	return 0;
    519}
    520
    521STATIC int
    522xfs_init_mount_workqueues(
    523	struct xfs_mount	*mp)
    524{
    525	mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
    526			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
    527			1, mp->m_super->s_id);
    528	if (!mp->m_buf_workqueue)
    529		goto out;
    530
    531	mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
    532			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
    533			0, mp->m_super->s_id);
    534	if (!mp->m_unwritten_workqueue)
    535		goto out_destroy_buf;
    536
    537	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
    538			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
    539			0, mp->m_super->s_id);
    540	if (!mp->m_reclaim_workqueue)
    541		goto out_destroy_unwritten;
    542
    543	mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s",
    544			XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM),
    545			0, mp->m_super->s_id);
    546	if (!mp->m_blockgc_wq)
    547		goto out_destroy_reclaim;
    548
    549	mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
    550			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
    551			1, mp->m_super->s_id);
    552	if (!mp->m_inodegc_wq)
    553		goto out_destroy_blockgc;
    554
    555	mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
    556			XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id);
    557	if (!mp->m_sync_workqueue)
    558		goto out_destroy_inodegc;
    559
    560	return 0;
    561
    562out_destroy_inodegc:
    563	destroy_workqueue(mp->m_inodegc_wq);
    564out_destroy_blockgc:
    565	destroy_workqueue(mp->m_blockgc_wq);
    566out_destroy_reclaim:
    567	destroy_workqueue(mp->m_reclaim_workqueue);
    568out_destroy_unwritten:
    569	destroy_workqueue(mp->m_unwritten_workqueue);
    570out_destroy_buf:
    571	destroy_workqueue(mp->m_buf_workqueue);
    572out:
    573	return -ENOMEM;
    574}
    575
    576STATIC void
    577xfs_destroy_mount_workqueues(
    578	struct xfs_mount	*mp)
    579{
    580	destroy_workqueue(mp->m_sync_workqueue);
    581	destroy_workqueue(mp->m_blockgc_wq);
    582	destroy_workqueue(mp->m_inodegc_wq);
    583	destroy_workqueue(mp->m_reclaim_workqueue);
    584	destroy_workqueue(mp->m_unwritten_workqueue);
    585	destroy_workqueue(mp->m_buf_workqueue);
    586}
    587
    588static void
    589xfs_flush_inodes_worker(
    590	struct work_struct	*work)
    591{
    592	struct xfs_mount	*mp = container_of(work, struct xfs_mount,
    593						   m_flush_inodes_work);
    594	struct super_block	*sb = mp->m_super;
    595
    596	if (down_read_trylock(&sb->s_umount)) {
    597		sync_inodes_sb(sb);
    598		up_read(&sb->s_umount);
    599	}
    600}
    601
    602/*
    603 * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
    604 * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
    605 * for IO to complete so that we effectively throttle multiple callers to the
    606 * rate at which IO is completing.
    607 */
    608void
    609xfs_flush_inodes(
    610	struct xfs_mount	*mp)
    611{
    612	/*
    613	 * If flush_work() returns true then that means we waited for a flush
    614	 * which was already in progress.  Don't bother running another scan.
    615	 */
    616	if (flush_work(&mp->m_flush_inodes_work))
    617		return;
    618
    619	queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work);
    620	flush_work(&mp->m_flush_inodes_work);
    621}
    622
    623/* Catch misguided souls that try to use this interface on XFS */
    624STATIC struct inode *
    625xfs_fs_alloc_inode(
    626	struct super_block	*sb)
    627{
    628	BUG();
    629	return NULL;
    630}
    631
    632/*
    633 * Now that the generic code is guaranteed not to be accessing
    634 * the linux inode, we can inactivate and reclaim the inode.
    635 */
    636STATIC void
    637xfs_fs_destroy_inode(
    638	struct inode		*inode)
    639{
    640	struct xfs_inode	*ip = XFS_I(inode);
    641
    642	trace_xfs_destroy_inode(ip);
    643
    644	ASSERT(!rwsem_is_locked(&inode->i_rwsem));
    645	XFS_STATS_INC(ip->i_mount, vn_rele);
    646	XFS_STATS_INC(ip->i_mount, vn_remove);
    647	xfs_inode_mark_reclaimable(ip);
    648}
    649
    650static void
    651xfs_fs_dirty_inode(
    652	struct inode			*inode,
    653	int				flag)
    654{
    655	struct xfs_inode		*ip = XFS_I(inode);
    656	struct xfs_mount		*mp = ip->i_mount;
    657	struct xfs_trans		*tp;
    658
    659	if (!(inode->i_sb->s_flags & SB_LAZYTIME))
    660		return;
    661	if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
    662		return;
    663
    664	if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
    665		return;
    666	xfs_ilock(ip, XFS_ILOCK_EXCL);
    667	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
    668	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
    669	xfs_trans_commit(tp);
    670}
    671
    672/*
    673 * Slab object creation initialisation for the XFS inode.
    674 * This covers only the idempotent fields in the XFS inode;
    675 * all other fields need to be initialised on allocation
    676 * from the slab. This avoids the need to repeatedly initialise
    677 * fields in the xfs inode that left in the initialise state
    678 * when freeing the inode.
    679 */
    680STATIC void
    681xfs_fs_inode_init_once(
    682	void			*inode)
    683{
    684	struct xfs_inode	*ip = inode;
    685
    686	memset(ip, 0, sizeof(struct xfs_inode));
    687
    688	/* vfs inode */
    689	inode_init_once(VFS_I(ip));
    690
    691	/* xfs inode */
    692	atomic_set(&ip->i_pincount, 0);
    693	spin_lock_init(&ip->i_flags_lock);
    694
    695	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
    696		     "xfsino", ip->i_ino);
    697}
    698
    699/*
    700 * We do an unlocked check for XFS_IDONTCACHE here because we are already
    701 * serialised against cache hits here via the inode->i_lock and igrab() in
    702 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
    703 * racing with us, and it avoids needing to grab a spinlock here for every inode
    704 * we drop the final reference on.
    705 */
    706STATIC int
    707xfs_fs_drop_inode(
    708	struct inode		*inode)
    709{
    710	struct xfs_inode	*ip = XFS_I(inode);
    711
    712	/*
    713	 * If this unlinked inode is in the middle of recovery, don't
    714	 * drop the inode just yet; log recovery will take care of
    715	 * that.  See the comment for this inode flag.
    716	 */
    717	if (ip->i_flags & XFS_IRECOVERY) {
    718		ASSERT(xlog_recovery_needed(ip->i_mount->m_log));
    719		return 0;
    720	}
    721
    722	return generic_drop_inode(inode);
    723}
    724
    725static void
    726xfs_mount_free(
    727	struct xfs_mount	*mp)
    728{
    729	kfree(mp->m_rtname);
    730	kfree(mp->m_logname);
    731	kmem_free(mp);
    732}
    733
    734STATIC int
    735xfs_fs_sync_fs(
    736	struct super_block	*sb,
    737	int			wait)
    738{
    739	struct xfs_mount	*mp = XFS_M(sb);
    740	int			error;
    741
    742	trace_xfs_fs_sync_fs(mp, __return_address);
    743
    744	/*
    745	 * Doing anything during the async pass would be counterproductive.
    746	 */
    747	if (!wait)
    748		return 0;
    749
    750	error = xfs_log_force(mp, XFS_LOG_SYNC);
    751	if (error)
    752		return error;
    753
    754	if (laptop_mode) {
    755		/*
    756		 * The disk must be active because we're syncing.
    757		 * We schedule log work now (now that the disk is
    758		 * active) instead of later (when it might not be).
    759		 */
    760		flush_delayed_work(&mp->m_log->l_work);
    761	}
    762
    763	/*
    764	 * If we are called with page faults frozen out, it means we are about
    765	 * to freeze the transaction subsystem. Take the opportunity to shut
    766	 * down inodegc because once SB_FREEZE_FS is set it's too late to
    767	 * prevent inactivation races with freeze. The fs doesn't get called
    768	 * again by the freezing process until after SB_FREEZE_FS has been set,
    769	 * so it's now or never.  Same logic applies to speculative allocation
    770	 * garbage collection.
    771	 *
    772	 * We don't care if this is a normal syncfs call that does this or
    773	 * freeze that does this - we can run this multiple times without issue
    774	 * and we won't race with a restart because a restart can only occur
    775	 * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE.
    776	 */
    777	if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) {
    778		xfs_inodegc_stop(mp);
    779		xfs_blockgc_stop(mp);
    780	}
    781
    782	return 0;
    783}
    784
    785STATIC int
    786xfs_fs_statfs(
    787	struct dentry		*dentry,
    788	struct kstatfs		*statp)
    789{
    790	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
    791	xfs_sb_t		*sbp = &mp->m_sb;
    792	struct xfs_inode	*ip = XFS_I(d_inode(dentry));
    793	uint64_t		fakeinos, id;
    794	uint64_t		icount;
    795	uint64_t		ifree;
    796	uint64_t		fdblocks;
    797	xfs_extlen_t		lsize;
    798	int64_t			ffree;
    799
    800	/*
    801	 * Expedite background inodegc but don't wait. We do not want to block
    802	 * here waiting hours for a billion extent file to be truncated.
    803	 */
    804	xfs_inodegc_push(mp);
    805
    806	statp->f_type = XFS_SUPER_MAGIC;
    807	statp->f_namelen = MAXNAMELEN - 1;
    808
    809	id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
    810	statp->f_fsid = u64_to_fsid(id);
    811
    812	icount = percpu_counter_sum(&mp->m_icount);
    813	ifree = percpu_counter_sum(&mp->m_ifree);
    814	fdblocks = percpu_counter_sum(&mp->m_fdblocks);
    815
    816	spin_lock(&mp->m_sb_lock);
    817	statp->f_bsize = sbp->sb_blocksize;
    818	lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
    819	statp->f_blocks = sbp->sb_dblocks - lsize;
    820	spin_unlock(&mp->m_sb_lock);
    821
    822	/* make sure statp->f_bfree does not underflow */
    823	statp->f_bfree = max_t(int64_t, 0,
    824				fdblocks - xfs_fdblocks_unavailable(mp));
    825	statp->f_bavail = statp->f_bfree;
    826
    827	fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
    828	statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
    829	if (M_IGEO(mp)->maxicount)
    830		statp->f_files = min_t(typeof(statp->f_files),
    831					statp->f_files,
    832					M_IGEO(mp)->maxicount);
    833
    834	/* If sb_icount overshot maxicount, report actual allocation */
    835	statp->f_files = max_t(typeof(statp->f_files),
    836					statp->f_files,
    837					sbp->sb_icount);
    838
    839	/* make sure statp->f_ffree does not underflow */
    840	ffree = statp->f_files - (icount - ifree);
    841	statp->f_ffree = max_t(int64_t, ffree, 0);
    842
    843
    844	if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
    845	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
    846			      (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
    847		xfs_qm_statvfs(ip, statp);
    848
    849	if (XFS_IS_REALTIME_MOUNT(mp) &&
    850	    (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
    851		s64	freertx;
    852
    853		statp->f_blocks = sbp->sb_rblocks;
    854		freertx = percpu_counter_sum_positive(&mp->m_frextents);
    855		statp->f_bavail = statp->f_bfree = freertx * sbp->sb_rextsize;
    856	}
    857
    858	return 0;
    859}
    860
    861STATIC void
    862xfs_save_resvblks(struct xfs_mount *mp)
    863{
    864	uint64_t resblks = 0;
    865
    866	mp->m_resblks_save = mp->m_resblks;
    867	xfs_reserve_blocks(mp, &resblks, NULL);
    868}
    869
    870STATIC void
    871xfs_restore_resvblks(struct xfs_mount *mp)
    872{
    873	uint64_t resblks;
    874
    875	if (mp->m_resblks_save) {
    876		resblks = mp->m_resblks_save;
    877		mp->m_resblks_save = 0;
    878	} else
    879		resblks = xfs_default_resblks(mp);
    880
    881	xfs_reserve_blocks(mp, &resblks, NULL);
    882}
    883
    884/*
    885 * Second stage of a freeze. The data is already frozen so we only
    886 * need to take care of the metadata. Once that's done sync the superblock
    887 * to the log to dirty it in case of a crash while frozen. This ensures that we
    888 * will recover the unlinked inode lists on the next mount.
    889 */
    890STATIC int
    891xfs_fs_freeze(
    892	struct super_block	*sb)
    893{
    894	struct xfs_mount	*mp = XFS_M(sb);
    895	unsigned int		flags;
    896	int			ret;
    897
    898	/*
    899	 * The filesystem is now frozen far enough that memory reclaim
    900	 * cannot safely operate on the filesystem. Hence we need to
    901	 * set a GFP_NOFS context here to avoid recursion deadlocks.
    902	 */
    903	flags = memalloc_nofs_save();
    904	xfs_save_resvblks(mp);
    905	ret = xfs_log_quiesce(mp);
    906	memalloc_nofs_restore(flags);
    907
    908	/*
    909	 * For read-write filesystems, we need to restart the inodegc on error
    910	 * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not
    911	 * going to be run to restart it now.  We are at SB_FREEZE_FS level
    912	 * here, so we can restart safely without racing with a stop in
    913	 * xfs_fs_sync_fs().
    914	 */
    915	if (ret && !xfs_is_readonly(mp)) {
    916		xfs_blockgc_start(mp);
    917		xfs_inodegc_start(mp);
    918	}
    919
    920	return ret;
    921}
    922
    923STATIC int
    924xfs_fs_unfreeze(
    925	struct super_block	*sb)
    926{
    927	struct xfs_mount	*mp = XFS_M(sb);
    928
    929	xfs_restore_resvblks(mp);
    930	xfs_log_work_queue(mp);
    931
    932	/*
    933	 * Don't reactivate the inodegc worker on a readonly filesystem because
    934	 * inodes are sent directly to reclaim.  Don't reactivate the blockgc
    935	 * worker because there are no speculative preallocations on a readonly
    936	 * filesystem.
    937	 */
    938	if (!xfs_is_readonly(mp)) {
    939		xfs_blockgc_start(mp);
    940		xfs_inodegc_start(mp);
    941	}
    942
    943	return 0;
    944}
    945
    946/*
    947 * This function fills in xfs_mount_t fields based on mount args.
    948 * Note: the superblock _has_ now been read in.
    949 */
    950STATIC int
    951xfs_finish_flags(
    952	struct xfs_mount	*mp)
    953{
    954	/* Fail a mount where the logbuf is smaller than the log stripe */
    955	if (xfs_has_logv2(mp)) {
    956		if (mp->m_logbsize <= 0 &&
    957		    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
    958			mp->m_logbsize = mp->m_sb.sb_logsunit;
    959		} else if (mp->m_logbsize > 0 &&
    960			   mp->m_logbsize < mp->m_sb.sb_logsunit) {
    961			xfs_warn(mp,
    962		"logbuf size must be greater than or equal to log stripe size");
    963			return -EINVAL;
    964		}
    965	} else {
    966		/* Fail a mount if the logbuf is larger than 32K */
    967		if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
    968			xfs_warn(mp,
    969		"logbuf size for version 1 logs must be 16K or 32K");
    970			return -EINVAL;
    971		}
    972	}
    973
    974	/*
    975	 * V5 filesystems always use attr2 format for attributes.
    976	 */
    977	if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) {
    978		xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
    979			     "attr2 is always enabled for V5 filesystems.");
    980		return -EINVAL;
    981	}
    982
    983	/*
    984	 * prohibit r/w mounts of read-only filesystems
    985	 */
    986	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) {
    987		xfs_warn(mp,
    988			"cannot mount a read-only filesystem as read-write");
    989		return -EROFS;
    990	}
    991
    992	if ((mp->m_qflags & XFS_GQUOTA_ACCT) &&
    993	    (mp->m_qflags & XFS_PQUOTA_ACCT) &&
    994	    !xfs_has_pquotino(mp)) {
    995		xfs_warn(mp,
    996		  "Super block does not support project and group quota together");
    997		return -EINVAL;
    998	}
    999
   1000	return 0;
   1001}
   1002
   1003static int
   1004xfs_init_percpu_counters(
   1005	struct xfs_mount	*mp)
   1006{
   1007	int		error;
   1008
   1009	error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
   1010	if (error)
   1011		return -ENOMEM;
   1012
   1013	error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
   1014	if (error)
   1015		goto free_icount;
   1016
   1017	error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
   1018	if (error)
   1019		goto free_ifree;
   1020
   1021	error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL);
   1022	if (error)
   1023		goto free_fdblocks;
   1024
   1025	error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL);
   1026	if (error)
   1027		goto free_delalloc;
   1028
   1029	return 0;
   1030
   1031free_delalloc:
   1032	percpu_counter_destroy(&mp->m_delalloc_blks);
   1033free_fdblocks:
   1034	percpu_counter_destroy(&mp->m_fdblocks);
   1035free_ifree:
   1036	percpu_counter_destroy(&mp->m_ifree);
   1037free_icount:
   1038	percpu_counter_destroy(&mp->m_icount);
   1039	return -ENOMEM;
   1040}
   1041
   1042void
   1043xfs_reinit_percpu_counters(
   1044	struct xfs_mount	*mp)
   1045{
   1046	percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
   1047	percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
   1048	percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
   1049	percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
   1050}
   1051
   1052static void
   1053xfs_destroy_percpu_counters(
   1054	struct xfs_mount	*mp)
   1055{
   1056	percpu_counter_destroy(&mp->m_icount);
   1057	percpu_counter_destroy(&mp->m_ifree);
   1058	percpu_counter_destroy(&mp->m_fdblocks);
   1059	ASSERT(xfs_is_shutdown(mp) ||
   1060	       percpu_counter_sum(&mp->m_delalloc_blks) == 0);
   1061	percpu_counter_destroy(&mp->m_delalloc_blks);
   1062	percpu_counter_destroy(&mp->m_frextents);
   1063}
   1064
   1065static int
   1066xfs_inodegc_init_percpu(
   1067	struct xfs_mount	*mp)
   1068{
   1069	struct xfs_inodegc	*gc;
   1070	int			cpu;
   1071
   1072	mp->m_inodegc = alloc_percpu(struct xfs_inodegc);
   1073	if (!mp->m_inodegc)
   1074		return -ENOMEM;
   1075
   1076	for_each_possible_cpu(cpu) {
   1077		gc = per_cpu_ptr(mp->m_inodegc, cpu);
   1078		init_llist_head(&gc->list);
   1079		gc->items = 0;
   1080		INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
   1081	}
   1082	return 0;
   1083}
   1084
   1085static void
   1086xfs_inodegc_free_percpu(
   1087	struct xfs_mount	*mp)
   1088{
   1089	if (!mp->m_inodegc)
   1090		return;
   1091	free_percpu(mp->m_inodegc);
   1092}
   1093
   1094static void
   1095xfs_fs_put_super(
   1096	struct super_block	*sb)
   1097{
   1098	struct xfs_mount	*mp = XFS_M(sb);
   1099
   1100	/* if ->fill_super failed, we have no mount to tear down */
   1101	if (!sb->s_fs_info)
   1102		return;
   1103
   1104	xfs_notice(mp, "Unmounting Filesystem");
   1105	xfs_filestream_unmount(mp);
   1106	xfs_unmountfs(mp);
   1107
   1108	xfs_freesb(mp);
   1109	free_percpu(mp->m_stats.xs_stats);
   1110	xfs_mount_list_del(mp);
   1111	xfs_inodegc_free_percpu(mp);
   1112	xfs_destroy_percpu_counters(mp);
   1113	xfs_destroy_mount_workqueues(mp);
   1114	xfs_close_devices(mp);
   1115
   1116	sb->s_fs_info = NULL;
   1117	xfs_mount_free(mp);
   1118}
   1119
   1120static long
   1121xfs_fs_nr_cached_objects(
   1122	struct super_block	*sb,
   1123	struct shrink_control	*sc)
   1124{
   1125	/* Paranoia: catch incorrect calls during mount setup or teardown */
   1126	if (WARN_ON_ONCE(!sb->s_fs_info))
   1127		return 0;
   1128	return xfs_reclaim_inodes_count(XFS_M(sb));
   1129}
   1130
   1131static long
   1132xfs_fs_free_cached_objects(
   1133	struct super_block	*sb,
   1134	struct shrink_control	*sc)
   1135{
   1136	return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
   1137}
   1138
   1139static const struct super_operations xfs_super_operations = {
   1140	.alloc_inode		= xfs_fs_alloc_inode,
   1141	.destroy_inode		= xfs_fs_destroy_inode,
   1142	.dirty_inode		= xfs_fs_dirty_inode,
   1143	.drop_inode		= xfs_fs_drop_inode,
   1144	.put_super		= xfs_fs_put_super,
   1145	.sync_fs		= xfs_fs_sync_fs,
   1146	.freeze_fs		= xfs_fs_freeze,
   1147	.unfreeze_fs		= xfs_fs_unfreeze,
   1148	.statfs			= xfs_fs_statfs,
   1149	.show_options		= xfs_fs_show_options,
   1150	.nr_cached_objects	= xfs_fs_nr_cached_objects,
   1151	.free_cached_objects	= xfs_fs_free_cached_objects,
   1152};
   1153
   1154static int
   1155suffix_kstrtoint(
   1156	const char	*s,
   1157	unsigned int	base,
   1158	int		*res)
   1159{
   1160	int		last, shift_left_factor = 0, _res;
   1161	char		*value;
   1162	int		ret = 0;
   1163
   1164	value = kstrdup(s, GFP_KERNEL);
   1165	if (!value)
   1166		return -ENOMEM;
   1167
   1168	last = strlen(value) - 1;
   1169	if (value[last] == 'K' || value[last] == 'k') {
   1170		shift_left_factor = 10;
   1171		value[last] = '\0';
   1172	}
   1173	if (value[last] == 'M' || value[last] == 'm') {
   1174		shift_left_factor = 20;
   1175		value[last] = '\0';
   1176	}
   1177	if (value[last] == 'G' || value[last] == 'g') {
   1178		shift_left_factor = 30;
   1179		value[last] = '\0';
   1180	}
   1181
   1182	if (kstrtoint(value, base, &_res))
   1183		ret = -EINVAL;
   1184	kfree(value);
   1185	*res = _res << shift_left_factor;
   1186	return ret;
   1187}
   1188
   1189static inline void
   1190xfs_fs_warn_deprecated(
   1191	struct fs_context	*fc,
   1192	struct fs_parameter	*param,
   1193	uint64_t		flag,
   1194	bool			value)
   1195{
   1196	/* Don't print the warning if reconfiguring and current mount point
   1197	 * already had the flag set
   1198	 */
   1199	if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) &&
   1200            !!(XFS_M(fc->root->d_sb)->m_features & flag) == value)
   1201		return;
   1202	xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key);
   1203}
   1204
   1205/*
   1206 * Set mount state from a mount option.
   1207 *
   1208 * NOTE: mp->m_super is NULL here!
   1209 */
   1210static int
   1211xfs_fs_parse_param(
   1212	struct fs_context	*fc,
   1213	struct fs_parameter	*param)
   1214{
   1215	struct xfs_mount	*parsing_mp = fc->s_fs_info;
   1216	struct fs_parse_result	result;
   1217	int			size = 0;
   1218	int			opt;
   1219
   1220	opt = fs_parse(fc, xfs_fs_parameters, param, &result);
   1221	if (opt < 0)
   1222		return opt;
   1223
   1224	switch (opt) {
   1225	case Opt_logbufs:
   1226		parsing_mp->m_logbufs = result.uint_32;
   1227		return 0;
   1228	case Opt_logbsize:
   1229		if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize))
   1230			return -EINVAL;
   1231		return 0;
   1232	case Opt_logdev:
   1233		kfree(parsing_mp->m_logname);
   1234		parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL);
   1235		if (!parsing_mp->m_logname)
   1236			return -ENOMEM;
   1237		return 0;
   1238	case Opt_rtdev:
   1239		kfree(parsing_mp->m_rtname);
   1240		parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL);
   1241		if (!parsing_mp->m_rtname)
   1242			return -ENOMEM;
   1243		return 0;
   1244	case Opt_allocsize:
   1245		if (suffix_kstrtoint(param->string, 10, &size))
   1246			return -EINVAL;
   1247		parsing_mp->m_allocsize_log = ffs(size) - 1;
   1248		parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE;
   1249		return 0;
   1250	case Opt_grpid:
   1251	case Opt_bsdgroups:
   1252		parsing_mp->m_features |= XFS_FEAT_GRPID;
   1253		return 0;
   1254	case Opt_nogrpid:
   1255	case Opt_sysvgroups:
   1256		parsing_mp->m_features &= ~XFS_FEAT_GRPID;
   1257		return 0;
   1258	case Opt_wsync:
   1259		parsing_mp->m_features |= XFS_FEAT_WSYNC;
   1260		return 0;
   1261	case Opt_norecovery:
   1262		parsing_mp->m_features |= XFS_FEAT_NORECOVERY;
   1263		return 0;
   1264	case Opt_noalign:
   1265		parsing_mp->m_features |= XFS_FEAT_NOALIGN;
   1266		return 0;
   1267	case Opt_swalloc:
   1268		parsing_mp->m_features |= XFS_FEAT_SWALLOC;
   1269		return 0;
   1270	case Opt_sunit:
   1271		parsing_mp->m_dalign = result.uint_32;
   1272		return 0;
   1273	case Opt_swidth:
   1274		parsing_mp->m_swidth = result.uint_32;
   1275		return 0;
   1276	case Opt_inode32:
   1277		parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS;
   1278		return 0;
   1279	case Opt_inode64:
   1280		parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
   1281		return 0;
   1282	case Opt_nouuid:
   1283		parsing_mp->m_features |= XFS_FEAT_NOUUID;
   1284		return 0;
   1285	case Opt_largeio:
   1286		parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE;
   1287		return 0;
   1288	case Opt_nolargeio:
   1289		parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE;
   1290		return 0;
   1291	case Opt_filestreams:
   1292		parsing_mp->m_features |= XFS_FEAT_FILESTREAMS;
   1293		return 0;
   1294	case Opt_noquota:
   1295		parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
   1296		parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
   1297		return 0;
   1298	case Opt_quota:
   1299	case Opt_uquota:
   1300	case Opt_usrquota:
   1301		parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD);
   1302		return 0;
   1303	case Opt_qnoenforce:
   1304	case Opt_uqnoenforce:
   1305		parsing_mp->m_qflags |= XFS_UQUOTA_ACCT;
   1306		parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
   1307		return 0;
   1308	case Opt_pquota:
   1309	case Opt_prjquota:
   1310		parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD);
   1311		return 0;
   1312	case Opt_pqnoenforce:
   1313		parsing_mp->m_qflags |= XFS_PQUOTA_ACCT;
   1314		parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
   1315		return 0;
   1316	case Opt_gquota:
   1317	case Opt_grpquota:
   1318		parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD);
   1319		return 0;
   1320	case Opt_gqnoenforce:
   1321		parsing_mp->m_qflags |= XFS_GQUOTA_ACCT;
   1322		parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
   1323		return 0;
   1324	case Opt_discard:
   1325		parsing_mp->m_features |= XFS_FEAT_DISCARD;
   1326		return 0;
   1327	case Opt_nodiscard:
   1328		parsing_mp->m_features &= ~XFS_FEAT_DISCARD;
   1329		return 0;
   1330#ifdef CONFIG_FS_DAX
   1331	case Opt_dax:
   1332		xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS);
   1333		return 0;
   1334	case Opt_dax_enum:
   1335		xfs_mount_set_dax_mode(parsing_mp, result.uint_32);
   1336		return 0;
   1337#endif
   1338	/* Following mount options will be removed in September 2025 */
   1339	case Opt_ikeep:
   1340		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true);
   1341		parsing_mp->m_features |= XFS_FEAT_IKEEP;
   1342		return 0;
   1343	case Opt_noikeep:
   1344		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false);
   1345		parsing_mp->m_features &= ~XFS_FEAT_IKEEP;
   1346		return 0;
   1347	case Opt_attr2:
   1348		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true);
   1349		parsing_mp->m_features |= XFS_FEAT_ATTR2;
   1350		return 0;
   1351	case Opt_noattr2:
   1352		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true);
   1353		parsing_mp->m_features |= XFS_FEAT_NOATTR2;
   1354		return 0;
   1355	default:
   1356		xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
   1357		return -EINVAL;
   1358	}
   1359
   1360	return 0;
   1361}
   1362
   1363static int
   1364xfs_fs_validate_params(
   1365	struct xfs_mount	*mp)
   1366{
   1367	/* No recovery flag requires a read-only mount */
   1368	if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
   1369		xfs_warn(mp, "no-recovery mounts must be read-only.");
   1370		return -EINVAL;
   1371	}
   1372
   1373	/*
   1374	 * We have not read the superblock at this point, so only the attr2
   1375	 * mount option can set the attr2 feature by this stage.
   1376	 */
   1377	if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) {
   1378		xfs_warn(mp, "attr2 and noattr2 cannot both be specified.");
   1379		return -EINVAL;
   1380	}
   1381
   1382
   1383	if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) {
   1384		xfs_warn(mp,
   1385	"sunit and swidth options incompatible with the noalign option");
   1386		return -EINVAL;
   1387	}
   1388
   1389	if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) {
   1390		xfs_warn(mp, "quota support not available in this kernel.");
   1391		return -EINVAL;
   1392	}
   1393
   1394	if ((mp->m_dalign && !mp->m_swidth) ||
   1395	    (!mp->m_dalign && mp->m_swidth)) {
   1396		xfs_warn(mp, "sunit and swidth must be specified together");
   1397		return -EINVAL;
   1398	}
   1399
   1400	if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) {
   1401		xfs_warn(mp,
   1402	"stripe width (%d) must be a multiple of the stripe unit (%d)",
   1403			mp->m_swidth, mp->m_dalign);
   1404		return -EINVAL;
   1405	}
   1406
   1407	if (mp->m_logbufs != -1 &&
   1408	    mp->m_logbufs != 0 &&
   1409	    (mp->m_logbufs < XLOG_MIN_ICLOGS ||
   1410	     mp->m_logbufs > XLOG_MAX_ICLOGS)) {
   1411		xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
   1412			mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
   1413		return -EINVAL;
   1414	}
   1415
   1416	if (mp->m_logbsize != -1 &&
   1417	    mp->m_logbsize !=  0 &&
   1418	    (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
   1419	     mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
   1420	     !is_power_of_2(mp->m_logbsize))) {
   1421		xfs_warn(mp,
   1422			"invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
   1423			mp->m_logbsize);
   1424		return -EINVAL;
   1425	}
   1426
   1427	if (xfs_has_allocsize(mp) &&
   1428	    (mp->m_allocsize_log > XFS_MAX_IO_LOG ||
   1429	     mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
   1430		xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
   1431			mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG);
   1432		return -EINVAL;
   1433	}
   1434
   1435	return 0;
   1436}
   1437
   1438static int
   1439xfs_fs_fill_super(
   1440	struct super_block	*sb,
   1441	struct fs_context	*fc)
   1442{
   1443	struct xfs_mount	*mp = sb->s_fs_info;
   1444	struct inode		*root;
   1445	int			flags = 0, error;
   1446
   1447	mp->m_super = sb;
   1448
   1449	error = xfs_fs_validate_params(mp);
   1450	if (error)
   1451		goto out_free_names;
   1452
   1453	sb_min_blocksize(sb, BBSIZE);
   1454	sb->s_xattr = xfs_xattr_handlers;
   1455	sb->s_export_op = &xfs_export_operations;
   1456#ifdef CONFIG_XFS_QUOTA
   1457	sb->s_qcop = &xfs_quotactl_operations;
   1458	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
   1459#endif
   1460	sb->s_op = &xfs_super_operations;
   1461
   1462	/*
   1463	 * Delay mount work if the debug hook is set. This is debug
   1464	 * instrumention to coordinate simulation of xfs mount failures with
   1465	 * VFS superblock operations
   1466	 */
   1467	if (xfs_globals.mount_delay) {
   1468		xfs_notice(mp, "Delaying mount for %d seconds.",
   1469			xfs_globals.mount_delay);
   1470		msleep(xfs_globals.mount_delay * 1000);
   1471	}
   1472
   1473	if (fc->sb_flags & SB_SILENT)
   1474		flags |= XFS_MFSI_QUIET;
   1475
   1476	error = xfs_open_devices(mp);
   1477	if (error)
   1478		goto out_free_names;
   1479
   1480	error = xfs_init_mount_workqueues(mp);
   1481	if (error)
   1482		goto out_close_devices;
   1483
   1484	error = xfs_init_percpu_counters(mp);
   1485	if (error)
   1486		goto out_destroy_workqueues;
   1487
   1488	error = xfs_inodegc_init_percpu(mp);
   1489	if (error)
   1490		goto out_destroy_counters;
   1491
   1492	/*
   1493	 * All percpu data structures requiring cleanup when a cpu goes offline
   1494	 * must be allocated before adding this @mp to the cpu-dead handler's
   1495	 * mount list.
   1496	 */
   1497	xfs_mount_list_add(mp);
   1498
   1499	/* Allocate stats memory before we do operations that might use it */
   1500	mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
   1501	if (!mp->m_stats.xs_stats) {
   1502		error = -ENOMEM;
   1503		goto out_destroy_inodegc;
   1504	}
   1505
   1506	error = xfs_readsb(mp, flags);
   1507	if (error)
   1508		goto out_free_stats;
   1509
   1510	error = xfs_finish_flags(mp);
   1511	if (error)
   1512		goto out_free_sb;
   1513
   1514	error = xfs_setup_devices(mp);
   1515	if (error)
   1516		goto out_free_sb;
   1517
   1518	/* V4 support is undergoing deprecation. */
   1519	if (!xfs_has_crc(mp)) {
   1520#ifdef CONFIG_XFS_SUPPORT_V4
   1521		xfs_warn_once(mp,
   1522	"Deprecated V4 format (crc=0) will not be supported after September 2030.");
   1523#else
   1524		xfs_warn(mp,
   1525	"Deprecated V4 format (crc=0) not supported by kernel.");
   1526		error = -EINVAL;
   1527		goto out_free_sb;
   1528#endif
   1529	}
   1530
   1531	/* Filesystem claims it needs repair, so refuse the mount. */
   1532	if (xfs_has_needsrepair(mp)) {
   1533		xfs_warn(mp, "Filesystem needs repair.  Please run xfs_repair.");
   1534		error = -EFSCORRUPTED;
   1535		goto out_free_sb;
   1536	}
   1537
   1538	/*
   1539	 * Don't touch the filesystem if a user tool thinks it owns the primary
   1540	 * superblock.  mkfs doesn't clear the flag from secondary supers, so
   1541	 * we don't check them at all.
   1542	 */
   1543	if (mp->m_sb.sb_inprogress) {
   1544		xfs_warn(mp, "Offline file system operation in progress!");
   1545		error = -EFSCORRUPTED;
   1546		goto out_free_sb;
   1547	}
   1548
   1549	/*
   1550	 * Until this is fixed only page-sized or smaller data blocks work.
   1551	 */
   1552	if (mp->m_sb.sb_blocksize > PAGE_SIZE) {
   1553		xfs_warn(mp,
   1554		"File system with blocksize %d bytes. "
   1555		"Only pagesize (%ld) or less will currently work.",
   1556				mp->m_sb.sb_blocksize, PAGE_SIZE);
   1557		error = -ENOSYS;
   1558		goto out_free_sb;
   1559	}
   1560
   1561	/* Ensure this filesystem fits in the page cache limits */
   1562	if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) ||
   1563	    xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) {
   1564		xfs_warn(mp,
   1565		"file system too large to be mounted on this system.");
   1566		error = -EFBIG;
   1567		goto out_free_sb;
   1568	}
   1569
   1570	/*
   1571	 * XFS block mappings use 54 bits to store the logical block offset.
   1572	 * This should suffice to handle the maximum file size that the VFS
   1573	 * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT
   1574	 * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes
   1575	 * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON
   1576	 * to check this assertion.
   1577	 *
   1578	 * Avoid integer overflow by comparing the maximum bmbt offset to the
   1579	 * maximum pagecache offset in units of fs blocks.
   1580	 */
   1581	if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) {
   1582		xfs_warn(mp,
   1583"MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!",
   1584			 XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE),
   1585			 XFS_MAX_FILEOFF);
   1586		error = -EINVAL;
   1587		goto out_free_sb;
   1588	}
   1589
   1590	error = xfs_filestream_mount(mp);
   1591	if (error)
   1592		goto out_free_sb;
   1593
   1594	/*
   1595	 * we must configure the block size in the superblock before we run the
   1596	 * full mount process as the mount process can lookup and cache inodes.
   1597	 */
   1598	sb->s_magic = XFS_SUPER_MAGIC;
   1599	sb->s_blocksize = mp->m_sb.sb_blocksize;
   1600	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
   1601	sb->s_maxbytes = MAX_LFS_FILESIZE;
   1602	sb->s_max_links = XFS_MAXLINK;
   1603	sb->s_time_gran = 1;
   1604	if (xfs_has_bigtime(mp)) {
   1605		sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN);
   1606		sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX);
   1607	} else {
   1608		sb->s_time_min = XFS_LEGACY_TIME_MIN;
   1609		sb->s_time_max = XFS_LEGACY_TIME_MAX;
   1610	}
   1611	trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max);
   1612	sb->s_iflags |= SB_I_CGROUPWB;
   1613
   1614	set_posix_acl_flag(sb);
   1615
   1616	/* version 5 superblocks support inode version counters. */
   1617	if (xfs_has_crc(mp))
   1618		sb->s_flags |= SB_I_VERSION;
   1619
   1620	if (xfs_has_dax_always(mp)) {
   1621		error = xfs_setup_dax_always(mp);
   1622		if (error)
   1623			goto out_filestream_unmount;
   1624	}
   1625
   1626	if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) {
   1627		xfs_warn(mp,
   1628	"mounting with \"discard\" option, but the device does not support discard");
   1629		mp->m_features &= ~XFS_FEAT_DISCARD;
   1630	}
   1631
   1632	if (xfs_has_reflink(mp)) {
   1633		if (mp->m_sb.sb_rblocks) {
   1634			xfs_alert(mp,
   1635	"reflink not compatible with realtime device!");
   1636			error = -EINVAL;
   1637			goto out_filestream_unmount;
   1638		}
   1639
   1640		if (xfs_globals.always_cow) {
   1641			xfs_info(mp, "using DEBUG-only always_cow mode.");
   1642			mp->m_always_cow = true;
   1643		}
   1644	}
   1645
   1646	if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) {
   1647		xfs_alert(mp,
   1648	"reverse mapping btree not compatible with realtime device!");
   1649		error = -EINVAL;
   1650		goto out_filestream_unmount;
   1651	}
   1652
   1653	if (xfs_has_large_extent_counts(mp))
   1654		xfs_warn(mp,
   1655	"EXPERIMENTAL Large extent counts feature in use. Use at your own risk!");
   1656
   1657	error = xfs_mountfs(mp);
   1658	if (error)
   1659		goto out_filestream_unmount;
   1660
   1661	root = igrab(VFS_I(mp->m_rootip));
   1662	if (!root) {
   1663		error = -ENOENT;
   1664		goto out_unmount;
   1665	}
   1666	sb->s_root = d_make_root(root);
   1667	if (!sb->s_root) {
   1668		error = -ENOMEM;
   1669		goto out_unmount;
   1670	}
   1671
   1672	return 0;
   1673
   1674 out_filestream_unmount:
   1675	xfs_filestream_unmount(mp);
   1676 out_free_sb:
   1677	xfs_freesb(mp);
   1678 out_free_stats:
   1679	free_percpu(mp->m_stats.xs_stats);
   1680 out_destroy_inodegc:
   1681	xfs_mount_list_del(mp);
   1682	xfs_inodegc_free_percpu(mp);
   1683 out_destroy_counters:
   1684	xfs_destroy_percpu_counters(mp);
   1685 out_destroy_workqueues:
   1686	xfs_destroy_mount_workqueues(mp);
   1687 out_close_devices:
   1688	xfs_close_devices(mp);
   1689 out_free_names:
   1690	sb->s_fs_info = NULL;
   1691	xfs_mount_free(mp);
   1692	return error;
   1693
   1694 out_unmount:
   1695	xfs_filestream_unmount(mp);
   1696	xfs_unmountfs(mp);
   1697	goto out_free_sb;
   1698}
   1699
   1700static int
   1701xfs_fs_get_tree(
   1702	struct fs_context	*fc)
   1703{
   1704	return get_tree_bdev(fc, xfs_fs_fill_super);
   1705}
   1706
   1707static int
   1708xfs_remount_rw(
   1709	struct xfs_mount	*mp)
   1710{
   1711	struct xfs_sb		*sbp = &mp->m_sb;
   1712	int error;
   1713
   1714	if (xfs_has_norecovery(mp)) {
   1715		xfs_warn(mp,
   1716			"ro->rw transition prohibited on norecovery mount");
   1717		return -EINVAL;
   1718	}
   1719
   1720	if (xfs_sb_is_v5(sbp) &&
   1721	    xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
   1722		xfs_warn(mp,
   1723	"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
   1724			(sbp->sb_features_ro_compat &
   1725				XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
   1726		return -EINVAL;
   1727	}
   1728
   1729	clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
   1730
   1731	/*
   1732	 * If this is the first remount to writeable state we might have some
   1733	 * superblock changes to update.
   1734	 */
   1735	if (mp->m_update_sb) {
   1736		error = xfs_sync_sb(mp, false);
   1737		if (error) {
   1738			xfs_warn(mp, "failed to write sb changes");
   1739			return error;
   1740		}
   1741		mp->m_update_sb = false;
   1742	}
   1743
   1744	/*
   1745	 * Fill out the reserve pool if it is empty. Use the stashed value if
   1746	 * it is non-zero, otherwise go with the default.
   1747	 */
   1748	xfs_restore_resvblks(mp);
   1749	xfs_log_work_queue(mp);
   1750	xfs_blockgc_start(mp);
   1751
   1752	/* Create the per-AG metadata reservation pool .*/
   1753	error = xfs_fs_reserve_ag_blocks(mp);
   1754	if (error && error != -ENOSPC)
   1755		return error;
   1756
   1757	/* Re-enable the background inode inactivation worker. */
   1758	xfs_inodegc_start(mp);
   1759
   1760	return 0;
   1761}
   1762
   1763static int
   1764xfs_remount_ro(
   1765	struct xfs_mount	*mp)
   1766{
   1767	struct xfs_icwalk	icw = {
   1768		.icw_flags	= XFS_ICWALK_FLAG_SYNC,
   1769	};
   1770	int			error;
   1771
   1772	/* Flush all the dirty data to disk. */
   1773	error = sync_filesystem(mp->m_super);
   1774	if (error)
   1775		return error;
   1776
   1777	/*
   1778	 * Cancel background eofb scanning so it cannot race with the final
   1779	 * log force+buftarg wait and deadlock the remount.
   1780	 */
   1781	xfs_blockgc_stop(mp);
   1782
   1783	/*
   1784	 * Clear out all remaining COW staging extents and speculative post-EOF
   1785	 * preallocations so that we don't leave inodes requiring inactivation
   1786	 * cleanups during reclaim on a read-only mount.  We must process every
   1787	 * cached inode, so this requires a synchronous cache scan.
   1788	 */
   1789	error = xfs_blockgc_free_space(mp, &icw);
   1790	if (error) {
   1791		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
   1792		return error;
   1793	}
   1794
   1795	/*
   1796	 * Stop the inodegc background worker.  xfs_fs_reconfigure already
   1797	 * flushed all pending inodegc work when it sync'd the filesystem.
   1798	 * The VFS holds s_umount, so we know that inodes cannot enter
   1799	 * xfs_fs_destroy_inode during a remount operation.  In readonly mode
   1800	 * we send inodes straight to reclaim, so no inodes will be queued.
   1801	 */
   1802	xfs_inodegc_stop(mp);
   1803
   1804	/* Free the per-AG metadata reservation pool. */
   1805	error = xfs_fs_unreserve_ag_blocks(mp);
   1806	if (error) {
   1807		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
   1808		return error;
   1809	}
   1810
   1811	/*
   1812	 * Before we sync the metadata, we need to free up the reserve block
   1813	 * pool so that the used block count in the superblock on disk is
   1814	 * correct at the end of the remount. Stash the current* reserve pool
   1815	 * size so that if we get remounted rw, we can return it to the same
   1816	 * size.
   1817	 */
   1818	xfs_save_resvblks(mp);
   1819
   1820	xfs_log_clean(mp);
   1821	set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
   1822
   1823	return 0;
   1824}
   1825
   1826/*
   1827 * Logically we would return an error here to prevent users from believing
   1828 * they might have changed mount options using remount which can't be changed.
   1829 *
   1830 * But unfortunately mount(8) adds all options from mtab and fstab to the mount
   1831 * arguments in some cases so we can't blindly reject options, but have to
   1832 * check for each specified option if it actually differs from the currently
   1833 * set option and only reject it if that's the case.
   1834 *
   1835 * Until that is implemented we return success for every remount request, and
   1836 * silently ignore all options that we can't actually change.
   1837 */
   1838static int
   1839xfs_fs_reconfigure(
   1840	struct fs_context *fc)
   1841{
   1842	struct xfs_mount	*mp = XFS_M(fc->root->d_sb);
   1843	struct xfs_mount        *new_mp = fc->s_fs_info;
   1844	int			flags = fc->sb_flags;
   1845	int			error;
   1846
   1847	/* version 5 superblocks always support version counters. */
   1848	if (xfs_has_crc(mp))
   1849		fc->sb_flags |= SB_I_VERSION;
   1850
   1851	error = xfs_fs_validate_params(new_mp);
   1852	if (error)
   1853		return error;
   1854
   1855	/* inode32 -> inode64 */
   1856	if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
   1857		mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
   1858		mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
   1859	}
   1860
   1861	/* inode64 -> inode32 */
   1862	if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) {
   1863		mp->m_features |= XFS_FEAT_SMALL_INUMS;
   1864		mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
   1865	}
   1866
   1867	/* ro -> rw */
   1868	if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
   1869		error = xfs_remount_rw(mp);
   1870		if (error)
   1871			return error;
   1872	}
   1873
   1874	/* rw -> ro */
   1875	if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) {
   1876		error = xfs_remount_ro(mp);
   1877		if (error)
   1878			return error;
   1879	}
   1880
   1881	return 0;
   1882}
   1883
   1884static void xfs_fs_free(
   1885	struct fs_context	*fc)
   1886{
   1887	struct xfs_mount	*mp = fc->s_fs_info;
   1888
   1889	/*
   1890	 * mp is stored in the fs_context when it is initialized.
   1891	 * mp is transferred to the superblock on a successful mount,
   1892	 * but if an error occurs before the transfer we have to free
   1893	 * it here.
   1894	 */
   1895	if (mp)
   1896		xfs_mount_free(mp);
   1897}
   1898
   1899static const struct fs_context_operations xfs_context_ops = {
   1900	.parse_param = xfs_fs_parse_param,
   1901	.get_tree    = xfs_fs_get_tree,
   1902	.reconfigure = xfs_fs_reconfigure,
   1903	.free        = xfs_fs_free,
   1904};
   1905
   1906static int xfs_init_fs_context(
   1907	struct fs_context	*fc)
   1908{
   1909	struct xfs_mount	*mp;
   1910
   1911	mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO);
   1912	if (!mp)
   1913		return -ENOMEM;
   1914
   1915	spin_lock_init(&mp->m_sb_lock);
   1916	spin_lock_init(&mp->m_agirotor_lock);
   1917	INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
   1918	spin_lock_init(&mp->m_perag_lock);
   1919	mutex_init(&mp->m_growlock);
   1920	INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
   1921	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
   1922	mp->m_kobj.kobject.kset = xfs_kset;
   1923	/*
   1924	 * We don't create the finobt per-ag space reservation until after log
   1925	 * recovery, so we must set this to true so that an ifree transaction
   1926	 * started during log recovery will not depend on space reservations
   1927	 * for finobt expansion.
   1928	 */
   1929	mp->m_finobt_nores = true;
   1930
   1931	/*
   1932	 * These can be overridden by the mount option parsing.
   1933	 */
   1934	mp->m_logbufs = -1;
   1935	mp->m_logbsize = -1;
   1936	mp->m_allocsize_log = 16; /* 64k */
   1937
   1938	/*
   1939	 * Copy binary VFS mount flags we are interested in.
   1940	 */
   1941	if (fc->sb_flags & SB_RDONLY)
   1942		set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
   1943	if (fc->sb_flags & SB_DIRSYNC)
   1944		mp->m_features |= XFS_FEAT_DIRSYNC;
   1945	if (fc->sb_flags & SB_SYNCHRONOUS)
   1946		mp->m_features |= XFS_FEAT_WSYNC;
   1947
   1948	fc->s_fs_info = mp;
   1949	fc->ops = &xfs_context_ops;
   1950
   1951	return 0;
   1952}
   1953
   1954static struct file_system_type xfs_fs_type = {
   1955	.owner			= THIS_MODULE,
   1956	.name			= "xfs",
   1957	.init_fs_context	= xfs_init_fs_context,
   1958	.parameters		= xfs_fs_parameters,
   1959	.kill_sb		= kill_block_super,
   1960	.fs_flags		= FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
   1961};
   1962MODULE_ALIAS_FS("xfs");
   1963
   1964STATIC int __init
   1965xfs_init_caches(void)
   1966{
   1967	int		error;
   1968
   1969	xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket",
   1970						sizeof(struct xlog_ticket),
   1971						0, 0, NULL);
   1972	if (!xfs_log_ticket_cache)
   1973		goto out;
   1974
   1975	error = xfs_btree_init_cur_caches();
   1976	if (error)
   1977		goto out_destroy_log_ticket_cache;
   1978
   1979	error = xfs_defer_init_item_caches();
   1980	if (error)
   1981		goto out_destroy_btree_cur_cache;
   1982
   1983	xfs_da_state_cache = kmem_cache_create("xfs_da_state",
   1984					      sizeof(struct xfs_da_state),
   1985					      0, 0, NULL);
   1986	if (!xfs_da_state_cache)
   1987		goto out_destroy_defer_item_cache;
   1988
   1989	xfs_ifork_cache = kmem_cache_create("xfs_ifork",
   1990					   sizeof(struct xfs_ifork),
   1991					   0, 0, NULL);
   1992	if (!xfs_ifork_cache)
   1993		goto out_destroy_da_state_cache;
   1994
   1995	xfs_trans_cache = kmem_cache_create("xfs_trans",
   1996					   sizeof(struct xfs_trans),
   1997					   0, 0, NULL);
   1998	if (!xfs_trans_cache)
   1999		goto out_destroy_ifork_cache;
   2000
   2001
   2002	/*
   2003	 * The size of the cache-allocated buf log item is the maximum
   2004	 * size possible under XFS.  This wastes a little bit of memory,
   2005	 * but it is much faster.
   2006	 */
   2007	xfs_buf_item_cache = kmem_cache_create("xfs_buf_item",
   2008					      sizeof(struct xfs_buf_log_item),
   2009					      0, 0, NULL);
   2010	if (!xfs_buf_item_cache)
   2011		goto out_destroy_trans_cache;
   2012
   2013	xfs_efd_cache = kmem_cache_create("xfs_efd_item",
   2014					(sizeof(struct xfs_efd_log_item) +
   2015					(XFS_EFD_MAX_FAST_EXTENTS - 1) *
   2016					sizeof(struct xfs_extent)),
   2017					0, 0, NULL);
   2018	if (!xfs_efd_cache)
   2019		goto out_destroy_buf_item_cache;
   2020
   2021	xfs_efi_cache = kmem_cache_create("xfs_efi_item",
   2022					 (sizeof(struct xfs_efi_log_item) +
   2023					 (XFS_EFI_MAX_FAST_EXTENTS - 1) *
   2024					 sizeof(struct xfs_extent)),
   2025					 0, 0, NULL);
   2026	if (!xfs_efi_cache)
   2027		goto out_destroy_efd_cache;
   2028
   2029	xfs_inode_cache = kmem_cache_create("xfs_inode",
   2030					   sizeof(struct xfs_inode), 0,
   2031					   (SLAB_HWCACHE_ALIGN |
   2032					    SLAB_RECLAIM_ACCOUNT |
   2033					    SLAB_MEM_SPREAD | SLAB_ACCOUNT),
   2034					   xfs_fs_inode_init_once);
   2035	if (!xfs_inode_cache)
   2036		goto out_destroy_efi_cache;
   2037
   2038	xfs_ili_cache = kmem_cache_create("xfs_ili",
   2039					 sizeof(struct xfs_inode_log_item), 0,
   2040					 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
   2041					 NULL);
   2042	if (!xfs_ili_cache)
   2043		goto out_destroy_inode_cache;
   2044
   2045	xfs_icreate_cache = kmem_cache_create("xfs_icr",
   2046					     sizeof(struct xfs_icreate_item),
   2047					     0, 0, NULL);
   2048	if (!xfs_icreate_cache)
   2049		goto out_destroy_ili_cache;
   2050
   2051	xfs_rud_cache = kmem_cache_create("xfs_rud_item",
   2052					 sizeof(struct xfs_rud_log_item),
   2053					 0, 0, NULL);
   2054	if (!xfs_rud_cache)
   2055		goto out_destroy_icreate_cache;
   2056
   2057	xfs_rui_cache = kmem_cache_create("xfs_rui_item",
   2058			xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
   2059			0, 0, NULL);
   2060	if (!xfs_rui_cache)
   2061		goto out_destroy_rud_cache;
   2062
   2063	xfs_cud_cache = kmem_cache_create("xfs_cud_item",
   2064					 sizeof(struct xfs_cud_log_item),
   2065					 0, 0, NULL);
   2066	if (!xfs_cud_cache)
   2067		goto out_destroy_rui_cache;
   2068
   2069	xfs_cui_cache = kmem_cache_create("xfs_cui_item",
   2070			xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
   2071			0, 0, NULL);
   2072	if (!xfs_cui_cache)
   2073		goto out_destroy_cud_cache;
   2074
   2075	xfs_bud_cache = kmem_cache_create("xfs_bud_item",
   2076					 sizeof(struct xfs_bud_log_item),
   2077					 0, 0, NULL);
   2078	if (!xfs_bud_cache)
   2079		goto out_destroy_cui_cache;
   2080
   2081	xfs_bui_cache = kmem_cache_create("xfs_bui_item",
   2082			xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
   2083			0, 0, NULL);
   2084	if (!xfs_bui_cache)
   2085		goto out_destroy_bud_cache;
   2086
   2087	xfs_attrd_cache = kmem_cache_create("xfs_attrd_item",
   2088					    sizeof(struct xfs_attrd_log_item),
   2089					    0, 0, NULL);
   2090	if (!xfs_attrd_cache)
   2091		goto out_destroy_bui_cache;
   2092
   2093	xfs_attri_cache = kmem_cache_create("xfs_attri_item",
   2094					    sizeof(struct xfs_attri_log_item),
   2095					    0, 0, NULL);
   2096	if (!xfs_attri_cache)
   2097		goto out_destroy_attrd_cache;
   2098
   2099	return 0;
   2100
   2101 out_destroy_attrd_cache:
   2102	kmem_cache_destroy(xfs_attrd_cache);
   2103 out_destroy_bui_cache:
   2104	kmem_cache_destroy(xfs_bui_cache);
   2105 out_destroy_bud_cache:
   2106	kmem_cache_destroy(xfs_bud_cache);
   2107 out_destroy_cui_cache:
   2108	kmem_cache_destroy(xfs_cui_cache);
   2109 out_destroy_cud_cache:
   2110	kmem_cache_destroy(xfs_cud_cache);
   2111 out_destroy_rui_cache:
   2112	kmem_cache_destroy(xfs_rui_cache);
   2113 out_destroy_rud_cache:
   2114	kmem_cache_destroy(xfs_rud_cache);
   2115 out_destroy_icreate_cache:
   2116	kmem_cache_destroy(xfs_icreate_cache);
   2117 out_destroy_ili_cache:
   2118	kmem_cache_destroy(xfs_ili_cache);
   2119 out_destroy_inode_cache:
   2120	kmem_cache_destroy(xfs_inode_cache);
   2121 out_destroy_efi_cache:
   2122	kmem_cache_destroy(xfs_efi_cache);
   2123 out_destroy_efd_cache:
   2124	kmem_cache_destroy(xfs_efd_cache);
   2125 out_destroy_buf_item_cache:
   2126	kmem_cache_destroy(xfs_buf_item_cache);
   2127 out_destroy_trans_cache:
   2128	kmem_cache_destroy(xfs_trans_cache);
   2129 out_destroy_ifork_cache:
   2130	kmem_cache_destroy(xfs_ifork_cache);
   2131 out_destroy_da_state_cache:
   2132	kmem_cache_destroy(xfs_da_state_cache);
   2133 out_destroy_defer_item_cache:
   2134	xfs_defer_destroy_item_caches();
   2135 out_destroy_btree_cur_cache:
   2136	xfs_btree_destroy_cur_caches();
   2137 out_destroy_log_ticket_cache:
   2138	kmem_cache_destroy(xfs_log_ticket_cache);
   2139 out:
   2140	return -ENOMEM;
   2141}
   2142
   2143STATIC void
   2144xfs_destroy_caches(void)
   2145{
   2146	/*
   2147	 * Make sure all delayed rcu free are flushed before we
   2148	 * destroy caches.
   2149	 */
   2150	rcu_barrier();
   2151	kmem_cache_destroy(xfs_attri_cache);
   2152	kmem_cache_destroy(xfs_attrd_cache);
   2153	kmem_cache_destroy(xfs_bui_cache);
   2154	kmem_cache_destroy(xfs_bud_cache);
   2155	kmem_cache_destroy(xfs_cui_cache);
   2156	kmem_cache_destroy(xfs_cud_cache);
   2157	kmem_cache_destroy(xfs_rui_cache);
   2158	kmem_cache_destroy(xfs_rud_cache);
   2159	kmem_cache_destroy(xfs_icreate_cache);
   2160	kmem_cache_destroy(xfs_ili_cache);
   2161	kmem_cache_destroy(xfs_inode_cache);
   2162	kmem_cache_destroy(xfs_efi_cache);
   2163	kmem_cache_destroy(xfs_efd_cache);
   2164	kmem_cache_destroy(xfs_buf_item_cache);
   2165	kmem_cache_destroy(xfs_trans_cache);
   2166	kmem_cache_destroy(xfs_ifork_cache);
   2167	kmem_cache_destroy(xfs_da_state_cache);
   2168	xfs_defer_destroy_item_caches();
   2169	xfs_btree_destroy_cur_caches();
   2170	kmem_cache_destroy(xfs_log_ticket_cache);
   2171}
   2172
   2173STATIC int __init
   2174xfs_init_workqueues(void)
   2175{
   2176	/*
   2177	 * The allocation workqueue can be used in memory reclaim situations
   2178	 * (writepage path), and parallelism is only limited by the number of
   2179	 * AGs in all the filesystems mounted. Hence use the default large
   2180	 * max_active value for this workqueue.
   2181	 */
   2182	xfs_alloc_wq = alloc_workqueue("xfsalloc",
   2183			XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0);
   2184	if (!xfs_alloc_wq)
   2185		return -ENOMEM;
   2186
   2187	xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND),
   2188			0);
   2189	if (!xfs_discard_wq)
   2190		goto out_free_alloc_wq;
   2191
   2192	return 0;
   2193out_free_alloc_wq:
   2194	destroy_workqueue(xfs_alloc_wq);
   2195	return -ENOMEM;
   2196}
   2197
   2198STATIC void
   2199xfs_destroy_workqueues(void)
   2200{
   2201	destroy_workqueue(xfs_discard_wq);
   2202	destroy_workqueue(xfs_alloc_wq);
   2203}
   2204
   2205#ifdef CONFIG_HOTPLUG_CPU
   2206static int
   2207xfs_cpu_dead(
   2208	unsigned int		cpu)
   2209{
   2210	struct xfs_mount	*mp, *n;
   2211
   2212	spin_lock(&xfs_mount_list_lock);
   2213	list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) {
   2214		spin_unlock(&xfs_mount_list_lock);
   2215		xfs_inodegc_cpu_dead(mp, cpu);
   2216		spin_lock(&xfs_mount_list_lock);
   2217	}
   2218	spin_unlock(&xfs_mount_list_lock);
   2219	return 0;
   2220}
   2221
   2222static int __init
   2223xfs_cpu_hotplug_init(void)
   2224{
   2225	int	error;
   2226
   2227	error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL,
   2228			xfs_cpu_dead);
   2229	if (error < 0)
   2230		xfs_alert(NULL,
   2231"Failed to initialise CPU hotplug, error %d. XFS is non-functional.",
   2232			error);
   2233	return error;
   2234}
   2235
   2236static void
   2237xfs_cpu_hotplug_destroy(void)
   2238{
   2239	cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD);
   2240}
   2241
   2242#else /* !CONFIG_HOTPLUG_CPU */
   2243static inline int xfs_cpu_hotplug_init(void) { return 0; }
   2244static inline void xfs_cpu_hotplug_destroy(void) {}
   2245#endif
   2246
   2247STATIC int __init
   2248init_xfs_fs(void)
   2249{
   2250	int			error;
   2251
   2252	xfs_check_ondisk_structs();
   2253
   2254	printk(KERN_INFO XFS_VERSION_STRING " with "
   2255			 XFS_BUILD_OPTIONS " enabled\n");
   2256
   2257	xfs_dir_startup();
   2258
   2259	error = xfs_cpu_hotplug_init();
   2260	if (error)
   2261		goto out;
   2262
   2263	error = xfs_init_caches();
   2264	if (error)
   2265		goto out_destroy_hp;
   2266
   2267	error = xfs_init_workqueues();
   2268	if (error)
   2269		goto out_destroy_caches;
   2270
   2271	error = xfs_mru_cache_init();
   2272	if (error)
   2273		goto out_destroy_wq;
   2274
   2275	error = xfs_buf_init();
   2276	if (error)
   2277		goto out_mru_cache_uninit;
   2278
   2279	error = xfs_init_procfs();
   2280	if (error)
   2281		goto out_buf_terminate;
   2282
   2283	error = xfs_sysctl_register();
   2284	if (error)
   2285		goto out_cleanup_procfs;
   2286
   2287	xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
   2288	if (!xfs_kset) {
   2289		error = -ENOMEM;
   2290		goto out_sysctl_unregister;
   2291	}
   2292
   2293	xfsstats.xs_kobj.kobject.kset = xfs_kset;
   2294
   2295	xfsstats.xs_stats = alloc_percpu(struct xfsstats);
   2296	if (!xfsstats.xs_stats) {
   2297		error = -ENOMEM;
   2298		goto out_kset_unregister;
   2299	}
   2300
   2301	error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
   2302			       "stats");
   2303	if (error)
   2304		goto out_free_stats;
   2305
   2306#ifdef DEBUG
   2307	xfs_dbg_kobj.kobject.kset = xfs_kset;
   2308	error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
   2309	if (error)
   2310		goto out_remove_stats_kobj;
   2311#endif
   2312
   2313	error = xfs_qm_init();
   2314	if (error)
   2315		goto out_remove_dbg_kobj;
   2316
   2317	error = register_filesystem(&xfs_fs_type);
   2318	if (error)
   2319		goto out_qm_exit;
   2320	return 0;
   2321
   2322 out_qm_exit:
   2323	xfs_qm_exit();
   2324 out_remove_dbg_kobj:
   2325#ifdef DEBUG
   2326	xfs_sysfs_del(&xfs_dbg_kobj);
   2327 out_remove_stats_kobj:
   2328#endif
   2329	xfs_sysfs_del(&xfsstats.xs_kobj);
   2330 out_free_stats:
   2331	free_percpu(xfsstats.xs_stats);
   2332 out_kset_unregister:
   2333	kset_unregister(xfs_kset);
   2334 out_sysctl_unregister:
   2335	xfs_sysctl_unregister();
   2336 out_cleanup_procfs:
   2337	xfs_cleanup_procfs();
   2338 out_buf_terminate:
   2339	xfs_buf_terminate();
   2340 out_mru_cache_uninit:
   2341	xfs_mru_cache_uninit();
   2342 out_destroy_wq:
   2343	xfs_destroy_workqueues();
   2344 out_destroy_caches:
   2345	xfs_destroy_caches();
   2346 out_destroy_hp:
   2347	xfs_cpu_hotplug_destroy();
   2348 out:
   2349	return error;
   2350}
   2351
   2352STATIC void __exit
   2353exit_xfs_fs(void)
   2354{
   2355	xfs_qm_exit();
   2356	unregister_filesystem(&xfs_fs_type);
   2357#ifdef DEBUG
   2358	xfs_sysfs_del(&xfs_dbg_kobj);
   2359#endif
   2360	xfs_sysfs_del(&xfsstats.xs_kobj);
   2361	free_percpu(xfsstats.xs_stats);
   2362	kset_unregister(xfs_kset);
   2363	xfs_sysctl_unregister();
   2364	xfs_cleanup_procfs();
   2365	xfs_buf_terminate();
   2366	xfs_mru_cache_uninit();
   2367	xfs_destroy_workqueues();
   2368	xfs_destroy_caches();
   2369	xfs_uuid_table_free();
   2370	xfs_cpu_hotplug_destroy();
   2371}
   2372
   2373module_init(init_xfs_fs);
   2374module_exit(exit_xfs_fs);
   2375
   2376MODULE_AUTHOR("Silicon Graphics, Inc.");
   2377MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
   2378MODULE_LICENSE("GPL");