super.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
super.c (25745B)
      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2017-2018 HUAWEI, Inc.
      4 *             https://www.huawei.com/
      5 * Copyright (C) 2021, Alibaba Cloud
      6 */
      7#include <linux/module.h>
      8#include <linux/buffer_head.h>
      9#include <linux/statfs.h>
     10#include <linux/parser.h>
     11#include <linux/seq_file.h>
     12#include <linux/crc32c.h>
     13#include <linux/fs_context.h>
     14#include <linux/fs_parser.h>
     15#include <linux/dax.h>
     16#include <linux/exportfs.h>
     17#include "xattr.h"
     18
     19#define CREATE_TRACE_POINTS
     20#include <trace/events/erofs.h>
     21
     22static struct kmem_cache *erofs_inode_cachep __read_mostly;
     23
     24void _erofs_err(struct super_block *sb, const char *function,
     25		const char *fmt, ...)
     26{
     27	struct va_format vaf;
     28	va_list args;
     29
     30	va_start(args, fmt);
     31
     32	vaf.fmt = fmt;
     33	vaf.va = &args;
     34
     35	pr_err("(device %s): %s: %pV", sb->s_id, function, &vaf);
     36	va_end(args);
     37}
     38
     39void _erofs_info(struct super_block *sb, const char *function,
     40		 const char *fmt, ...)
     41{
     42	struct va_format vaf;
     43	va_list args;
     44
     45	va_start(args, fmt);
     46
     47	vaf.fmt = fmt;
     48	vaf.va = &args;
     49
     50	pr_info("(device %s): %pV", sb->s_id, &vaf);
     51	va_end(args);
     52}
     53
     54static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata)
     55{
     56	struct erofs_super_block *dsb;
     57	u32 expected_crc, crc;
     58
     59	dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET,
     60		      EROFS_BLKSIZ - EROFS_SUPER_OFFSET, GFP_KERNEL);
     61	if (!dsb)
     62		return -ENOMEM;
     63
     64	expected_crc = le32_to_cpu(dsb->checksum);
     65	dsb->checksum = 0;
     66	/* to allow for x86 boot sectors and other oddities. */
     67	crc = crc32c(~0, dsb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET);
     68	kfree(dsb);
     69
     70	if (crc != expected_crc) {
     71		erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected",
     72			  crc, expected_crc);
     73		return -EBADMSG;
     74	}
     75	return 0;
     76}
     77
     78static void erofs_inode_init_once(void *ptr)
     79{
     80	struct erofs_inode *vi = ptr;
     81
     82	inode_init_once(&vi->vfs_inode);
     83}
     84
     85static struct inode *erofs_alloc_inode(struct super_block *sb)
     86{
     87	struct erofs_inode *vi =
     88		alloc_inode_sb(sb, erofs_inode_cachep, GFP_KERNEL);
     89
     90	if (!vi)
     91		return NULL;
     92
     93	/* zero out everything except vfs_inode */
     94	memset(vi, 0, offsetof(struct erofs_inode, vfs_inode));
     95	return &vi->vfs_inode;
     96}
     97
     98static void erofs_free_inode(struct inode *inode)
     99{
    100	struct erofs_inode *vi = EROFS_I(inode);
    101
    102	/* be careful of RCU symlink path */
    103	if (inode->i_op == &erofs_fast_symlink_iops)
    104		kfree(inode->i_link);
    105	kfree(vi->xattr_shared_xattrs);
    106
    107	kmem_cache_free(erofs_inode_cachep, vi);
    108}
    109
    110static bool check_layout_compatibility(struct super_block *sb,
    111				       struct erofs_super_block *dsb)
    112{
    113	const unsigned int feature = le32_to_cpu(dsb->feature_incompat);
    114
    115	EROFS_SB(sb)->feature_incompat = feature;
    116
    117	/* check if current kernel meets all mandatory requirements */
    118	if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) {
    119		erofs_err(sb,
    120			  "unidentified incompatible feature %x, please upgrade kernel version",
    121			   feature & ~EROFS_ALL_FEATURE_INCOMPAT);
    122		return false;
    123	}
    124	return true;
    125}
    126
    127#ifdef CONFIG_EROFS_FS_ZIP
    128/* read variable-sized metadata, offset will be aligned by 4-byte */
    129static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf,
    130				 erofs_off_t *offset, int *lengthp)
    131{
    132	u8 *buffer, *ptr;
    133	int len, i, cnt;
    134
    135	*offset = round_up(*offset, 4);
    136	ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset), EROFS_KMAP);
    137	if (IS_ERR(ptr))
    138		return ptr;
    139
    140	len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(*offset)]);
    141	if (!len)
    142		len = U16_MAX + 1;
    143	buffer = kmalloc(len, GFP_KERNEL);
    144	if (!buffer)
    145		return ERR_PTR(-ENOMEM);
    146	*offset += sizeof(__le16);
    147	*lengthp = len;
    148
    149	for (i = 0; i < len; i += cnt) {
    150		cnt = min(EROFS_BLKSIZ - (int)erofs_blkoff(*offset), len - i);
    151		ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset),
    152					 EROFS_KMAP);
    153		if (IS_ERR(ptr)) {
    154			kfree(buffer);
    155			return ptr;
    156		}
    157		memcpy(buffer + i, ptr + erofs_blkoff(*offset), cnt);
    158		*offset += cnt;
    159	}
    160	return buffer;
    161}
    162
    163static int erofs_load_compr_cfgs(struct super_block *sb,
    164				 struct erofs_super_block *dsb)
    165{
    166	struct erofs_sb_info *sbi = EROFS_SB(sb);
    167	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
    168	unsigned int algs, alg;
    169	erofs_off_t offset;
    170	int size, ret = 0;
    171
    172	sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs);
    173	if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) {
    174		erofs_err(sb, "try to load compressed fs with unsupported algorithms %x",
    175			  sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS);
    176		return -EINVAL;
    177	}
    178
    179	offset = EROFS_SUPER_OFFSET + sbi->sb_size;
    180	alg = 0;
    181	for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) {
    182		void *data;
    183
    184		if (!(algs & 1))
    185			continue;
    186
    187		data = erofs_read_metadata(sb, &buf, &offset, &size);
    188		if (IS_ERR(data)) {
    189			ret = PTR_ERR(data);
    190			break;
    191		}
    192
    193		switch (alg) {
    194		case Z_EROFS_COMPRESSION_LZ4:
    195			ret = z_erofs_load_lz4_config(sb, dsb, data, size);
    196			break;
    197		case Z_EROFS_COMPRESSION_LZMA:
    198			ret = z_erofs_load_lzma_config(sb, dsb, data, size);
    199			break;
    200		default:
    201			DBG_BUGON(1);
    202			ret = -EFAULT;
    203		}
    204		kfree(data);
    205		if (ret)
    206			break;
    207	}
    208	erofs_put_metabuf(&buf);
    209	return ret;
    210}
    211#else
    212static int erofs_load_compr_cfgs(struct super_block *sb,
    213				 struct erofs_super_block *dsb)
    214{
    215	if (dsb->u1.available_compr_algs) {
    216		erofs_err(sb, "try to load compressed fs when compression is disabled");
    217		return -EINVAL;
    218	}
    219	return 0;
    220}
    221#endif
    222
    223static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
    224			     struct erofs_device_info *dif, erofs_off_t *pos)
    225{
    226	struct erofs_sb_info *sbi = EROFS_SB(sb);
    227	struct erofs_deviceslot *dis;
    228	struct block_device *bdev;
    229	void *ptr;
    230	int ret;
    231
    232	ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*pos), EROFS_KMAP);
    233	if (IS_ERR(ptr))
    234		return PTR_ERR(ptr);
    235	dis = ptr + erofs_blkoff(*pos);
    236
    237	if (!dif->path) {
    238		if (!dis->tag[0]) {
    239			erofs_err(sb, "empty device tag @ pos %llu", *pos);
    240			return -EINVAL;
    241		}
    242		dif->path = kmemdup_nul(dis->tag, sizeof(dis->tag), GFP_KERNEL);
    243		if (!dif->path)
    244			return -ENOMEM;
    245	}
    246
    247	if (erofs_is_fscache_mode(sb)) {
    248		ret = erofs_fscache_register_cookie(sb, &dif->fscache,
    249				dif->path, false);
    250		if (ret)
    251			return ret;
    252	} else {
    253		bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL,
    254					  sb->s_type);
    255		if (IS_ERR(bdev))
    256			return PTR_ERR(bdev);
    257		dif->bdev = bdev;
    258		dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off);
    259	}
    260
    261	dif->blocks = le32_to_cpu(dis->blocks);
    262	dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr);
    263	sbi->total_blocks += dif->blocks;
    264	*pos += EROFS_DEVT_SLOT_SIZE;
    265	return 0;
    266}
    267
    268static int erofs_scan_devices(struct super_block *sb,
    269			      struct erofs_super_block *dsb)
    270{
    271	struct erofs_sb_info *sbi = EROFS_SB(sb);
    272	unsigned int ondisk_extradevs;
    273	erofs_off_t pos;
    274	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
    275	struct erofs_device_info *dif;
    276	int id, err = 0;
    277
    278	sbi->total_blocks = sbi->primarydevice_blocks;
    279	if (!erofs_sb_has_device_table(sbi))
    280		ondisk_extradevs = 0;
    281	else
    282		ondisk_extradevs = le16_to_cpu(dsb->extra_devices);
    283
    284	if (sbi->devs->extra_devices &&
    285	    ondisk_extradevs != sbi->devs->extra_devices) {
    286		erofs_err(sb, "extra devices don't match (ondisk %u, given %u)",
    287			  ondisk_extradevs, sbi->devs->extra_devices);
    288		return -EINVAL;
    289	}
    290	if (!ondisk_extradevs)
    291		return 0;
    292
    293	sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1;
    294	pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE;
    295	down_read(&sbi->devs->rwsem);
    296	if (sbi->devs->extra_devices) {
    297		idr_for_each_entry(&sbi->devs->tree, dif, id) {
    298			err = erofs_init_device(&buf, sb, dif, &pos);
    299			if (err)
    300				break;
    301		}
    302	} else {
    303		for (id = 0; id < ondisk_extradevs; id++) {
    304			dif = kzalloc(sizeof(*dif), GFP_KERNEL);
    305			if (!dif) {
    306				err = -ENOMEM;
    307				break;
    308			}
    309
    310			err = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL);
    311			if (err < 0) {
    312				kfree(dif);
    313				break;
    314			}
    315			++sbi->devs->extra_devices;
    316
    317			err = erofs_init_device(&buf, sb, dif, &pos);
    318			if (err)
    319				break;
    320		}
    321	}
    322	up_read(&sbi->devs->rwsem);
    323	erofs_put_metabuf(&buf);
    324	return err;
    325}
    326
    327static int erofs_read_superblock(struct super_block *sb)
    328{
    329	struct erofs_sb_info *sbi;
    330	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
    331	struct erofs_super_block *dsb;
    332	unsigned int blkszbits;
    333	void *data;
    334	int ret;
    335
    336	data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP);
    337	if (IS_ERR(data)) {
    338		erofs_err(sb, "cannot read erofs superblock");
    339		return PTR_ERR(data);
    340	}
    341
    342	sbi = EROFS_SB(sb);
    343	dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET);
    344
    345	ret = -EINVAL;
    346	if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) {
    347		erofs_err(sb, "cannot find valid erofs superblock");
    348		goto out;
    349	}
    350
    351	sbi->feature_compat = le32_to_cpu(dsb->feature_compat);
    352	if (erofs_sb_has_sb_chksum(sbi)) {
    353		ret = erofs_superblock_csum_verify(sb, data);
    354		if (ret)
    355			goto out;
    356	}
    357
    358	ret = -EINVAL;
    359	blkszbits = dsb->blkszbits;
    360	/* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
    361	if (blkszbits != LOG_BLOCK_SIZE) {
    362		erofs_err(sb, "blkszbits %u isn't supported on this platform",
    363			  blkszbits);
    364		goto out;
    365	}
    366
    367	if (!check_layout_compatibility(sb, dsb))
    368		goto out;
    369
    370	sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE;
    371	if (sbi->sb_size > EROFS_BLKSIZ) {
    372		erofs_err(sb, "invalid sb_extslots %u (more than a fs block)",
    373			  sbi->sb_size);
    374		goto out;
    375	}
    376	sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks);
    377	sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
    378#ifdef CONFIG_EROFS_FS_XATTR
    379	sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
    380#endif
    381	sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
    382	sbi->root_nid = le16_to_cpu(dsb->root_nid);
    383	sbi->inos = le64_to_cpu(dsb->inos);
    384
    385	sbi->build_time = le64_to_cpu(dsb->build_time);
    386	sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
    387
    388	memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid));
    389
    390	ret = strscpy(sbi->volume_name, dsb->volume_name,
    391		      sizeof(dsb->volume_name));
    392	if (ret < 0) {	/* -E2BIG */
    393		erofs_err(sb, "bad volume name without NIL terminator");
    394		ret = -EFSCORRUPTED;
    395		goto out;
    396	}
    397
    398	/* parse on-disk compression configurations */
    399	if (erofs_sb_has_compr_cfgs(sbi))
    400		ret = erofs_load_compr_cfgs(sb, dsb);
    401	else
    402		ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0);
    403	if (ret < 0)
    404		goto out;
    405
    406	/* handle multiple devices */
    407	ret = erofs_scan_devices(sb, dsb);
    408
    409	if (erofs_sb_has_ztailpacking(sbi))
    410		erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
    411	if (erofs_is_fscache_mode(sb))
    412		erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!");
    413out:
    414	erofs_put_metabuf(&buf);
    415	return ret;
    416}
    417
    418/* set up default EROFS parameters */
    419static void erofs_default_options(struct erofs_fs_context *ctx)
    420{
    421#ifdef CONFIG_EROFS_FS_ZIP
    422	ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND;
    423	ctx->opt.max_sync_decompress_pages = 3;
    424	ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO;
    425#endif
    426#ifdef CONFIG_EROFS_FS_XATTR
    427	set_opt(&ctx->opt, XATTR_USER);
    428#endif
    429#ifdef CONFIG_EROFS_FS_POSIX_ACL
    430	set_opt(&ctx->opt, POSIX_ACL);
    431#endif
    432}
    433
    434enum {
    435	Opt_user_xattr,
    436	Opt_acl,
    437	Opt_cache_strategy,
    438	Opt_dax,
    439	Opt_dax_enum,
    440	Opt_device,
    441	Opt_fsid,
    442	Opt_err
    443};
    444
    445static const struct constant_table erofs_param_cache_strategy[] = {
    446	{"disabled",	EROFS_ZIP_CACHE_DISABLED},
    447	{"readahead",	EROFS_ZIP_CACHE_READAHEAD},
    448	{"readaround",	EROFS_ZIP_CACHE_READAROUND},
    449	{}
    450};
    451
    452static const struct constant_table erofs_dax_param_enums[] = {
    453	{"always",	EROFS_MOUNT_DAX_ALWAYS},
    454	{"never",	EROFS_MOUNT_DAX_NEVER},
    455	{}
    456};
    457
    458static const struct fs_parameter_spec erofs_fs_parameters[] = {
    459	fsparam_flag_no("user_xattr",	Opt_user_xattr),
    460	fsparam_flag_no("acl",		Opt_acl),
    461	fsparam_enum("cache_strategy",	Opt_cache_strategy,
    462		     erofs_param_cache_strategy),
    463	fsparam_flag("dax",             Opt_dax),
    464	fsparam_enum("dax",		Opt_dax_enum, erofs_dax_param_enums),
    465	fsparam_string("device",	Opt_device),
    466	fsparam_string("fsid",		Opt_fsid),
    467	{}
    468};
    469
    470static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
    471{
    472#ifdef CONFIG_FS_DAX
    473	struct erofs_fs_context *ctx = fc->fs_private;
    474
    475	switch (mode) {
    476	case EROFS_MOUNT_DAX_ALWAYS:
    477		warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
    478		set_opt(&ctx->opt, DAX_ALWAYS);
    479		clear_opt(&ctx->opt, DAX_NEVER);
    480		return true;
    481	case EROFS_MOUNT_DAX_NEVER:
    482		set_opt(&ctx->opt, DAX_NEVER);
    483		clear_opt(&ctx->opt, DAX_ALWAYS);
    484		return true;
    485	default:
    486		DBG_BUGON(1);
    487		return false;
    488	}
    489#else
    490	errorfc(fc, "dax options not supported");
    491	return false;
    492#endif
    493}
    494
    495static int erofs_fc_parse_param(struct fs_context *fc,
    496				struct fs_parameter *param)
    497{
    498	struct erofs_fs_context *ctx = fc->fs_private;
    499	struct fs_parse_result result;
    500	struct erofs_device_info *dif;
    501	int opt, ret;
    502
    503	opt = fs_parse(fc, erofs_fs_parameters, param, &result);
    504	if (opt < 0)
    505		return opt;
    506
    507	switch (opt) {
    508	case Opt_user_xattr:
    509#ifdef CONFIG_EROFS_FS_XATTR
    510		if (result.boolean)
    511			set_opt(&ctx->opt, XATTR_USER);
    512		else
    513			clear_opt(&ctx->opt, XATTR_USER);
    514#else
    515		errorfc(fc, "{,no}user_xattr options not supported");
    516#endif
    517		break;
    518	case Opt_acl:
    519#ifdef CONFIG_EROFS_FS_POSIX_ACL
    520		if (result.boolean)
    521			set_opt(&ctx->opt, POSIX_ACL);
    522		else
    523			clear_opt(&ctx->opt, POSIX_ACL);
    524#else
    525		errorfc(fc, "{,no}acl options not supported");
    526#endif
    527		break;
    528	case Opt_cache_strategy:
    529#ifdef CONFIG_EROFS_FS_ZIP
    530		ctx->opt.cache_strategy = result.uint_32;
    531#else
    532		errorfc(fc, "compression not supported, cache_strategy ignored");
    533#endif
    534		break;
    535	case Opt_dax:
    536		if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS))
    537			return -EINVAL;
    538		break;
    539	case Opt_dax_enum:
    540		if (!erofs_fc_set_dax_mode(fc, result.uint_32))
    541			return -EINVAL;
    542		break;
    543	case Opt_device:
    544		dif = kzalloc(sizeof(*dif), GFP_KERNEL);
    545		if (!dif)
    546			return -ENOMEM;
    547		dif->path = kstrdup(param->string, GFP_KERNEL);
    548		if (!dif->path) {
    549			kfree(dif);
    550			return -ENOMEM;
    551		}
    552		down_write(&ctx->devs->rwsem);
    553		ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL);
    554		up_write(&ctx->devs->rwsem);
    555		if (ret < 0) {
    556			kfree(dif->path);
    557			kfree(dif);
    558			return ret;
    559		}
    560		++ctx->devs->extra_devices;
    561		break;
    562	case Opt_fsid:
    563#ifdef CONFIG_EROFS_FS_ONDEMAND
    564		kfree(ctx->opt.fsid);
    565		ctx->opt.fsid = kstrdup(param->string, GFP_KERNEL);
    566		if (!ctx->opt.fsid)
    567			return -ENOMEM;
    568#else
    569		errorfc(fc, "fsid option not supported");
    570#endif
    571		break;
    572	default:
    573		return -ENOPARAM;
    574	}
    575	return 0;
    576}
    577
    578#ifdef CONFIG_EROFS_FS_ZIP
    579static const struct address_space_operations managed_cache_aops;
    580
    581static bool erofs_managed_cache_release_folio(struct folio *folio, gfp_t gfp)
    582{
    583	bool ret = true;
    584	struct address_space *const mapping = folio->mapping;
    585
    586	DBG_BUGON(!folio_test_locked(folio));
    587	DBG_BUGON(mapping->a_ops != &managed_cache_aops);
    588
    589	if (folio_test_private(folio))
    590		ret = erofs_try_to_free_cached_page(&folio->page);
    591
    592	return ret;
    593}
    594
    595/*
    596 * It will be called only on inode eviction. In case that there are still some
    597 * decompression requests in progress, wait with rescheduling for a bit here.
    598 * We could introduce an extra locking instead but it seems unnecessary.
    599 */
    600static void erofs_managed_cache_invalidate_folio(struct folio *folio,
    601					       size_t offset, size_t length)
    602{
    603	const size_t stop = length + offset;
    604
    605	DBG_BUGON(!folio_test_locked(folio));
    606
    607	/* Check for potential overflow in debug mode */
    608	DBG_BUGON(stop > folio_size(folio) || stop < length);
    609
    610	if (offset == 0 && stop == folio_size(folio))
    611		while (!erofs_managed_cache_release_folio(folio, GFP_NOFS))
    612			cond_resched();
    613}
    614
    615static const struct address_space_operations managed_cache_aops = {
    616	.release_folio = erofs_managed_cache_release_folio,
    617	.invalidate_folio = erofs_managed_cache_invalidate_folio,
    618};
    619
    620static int erofs_init_managed_cache(struct super_block *sb)
    621{
    622	struct erofs_sb_info *const sbi = EROFS_SB(sb);
    623	struct inode *const inode = new_inode(sb);
    624
    625	if (!inode)
    626		return -ENOMEM;
    627
    628	set_nlink(inode, 1);
    629	inode->i_size = OFFSET_MAX;
    630
    631	inode->i_mapping->a_ops = &managed_cache_aops;
    632	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
    633	sbi->managed_cache = inode;
    634	return 0;
    635}
    636#else
    637static int erofs_init_managed_cache(struct super_block *sb) { return 0; }
    638#endif
    639
    640static struct inode *erofs_nfs_get_inode(struct super_block *sb,
    641					 u64 ino, u32 generation)
    642{
    643	return erofs_iget(sb, ino, false);
    644}
    645
    646static struct dentry *erofs_fh_to_dentry(struct super_block *sb,
    647		struct fid *fid, int fh_len, int fh_type)
    648{
    649	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
    650				    erofs_nfs_get_inode);
    651}
    652
    653static struct dentry *erofs_fh_to_parent(struct super_block *sb,
    654		struct fid *fid, int fh_len, int fh_type)
    655{
    656	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
    657				    erofs_nfs_get_inode);
    658}
    659
    660static struct dentry *erofs_get_parent(struct dentry *child)
    661{
    662	erofs_nid_t nid;
    663	unsigned int d_type;
    664	int err;
    665
    666	err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type);
    667	if (err)
    668		return ERR_PTR(err);
    669	return d_obtain_alias(erofs_iget(child->d_sb, nid, d_type == FT_DIR));
    670}
    671
    672static const struct export_operations erofs_export_ops = {
    673	.fh_to_dentry = erofs_fh_to_dentry,
    674	.fh_to_parent = erofs_fh_to_parent,
    675	.get_parent = erofs_get_parent,
    676};
    677
    678static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
    679{
    680	struct inode *inode;
    681	struct erofs_sb_info *sbi;
    682	struct erofs_fs_context *ctx = fc->fs_private;
    683	int err;
    684
    685	sb->s_magic = EROFS_SUPER_MAGIC;
    686	sb->s_flags |= SB_RDONLY | SB_NOATIME;
    687	sb->s_maxbytes = MAX_LFS_FILESIZE;
    688	sb->s_op = &erofs_sops;
    689
    690	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
    691	if (!sbi)
    692		return -ENOMEM;
    693
    694	sb->s_fs_info = sbi;
    695	sbi->opt = ctx->opt;
    696	ctx->opt.fsid = NULL;
    697	sbi->devs = ctx->devs;
    698	ctx->devs = NULL;
    699
    700	if (erofs_is_fscache_mode(sb)) {
    701		sb->s_blocksize = EROFS_BLKSIZ;
    702		sb->s_blocksize_bits = LOG_BLOCK_SIZE;
    703
    704		err = erofs_fscache_register_fs(sb);
    705		if (err)
    706			return err;
    707
    708		err = erofs_fscache_register_cookie(sb, &sbi->s_fscache,
    709						    sbi->opt.fsid, true);
    710		if (err)
    711			return err;
    712
    713		err = super_setup_bdi(sb);
    714		if (err)
    715			return err;
    716	} else {
    717		if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) {
    718			erofs_err(sb, "failed to set erofs blksize");
    719			return -EINVAL;
    720		}
    721
    722		sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev,
    723						  &sbi->dax_part_off);
    724	}
    725
    726	err = erofs_read_superblock(sb);
    727	if (err)
    728		return err;
    729
    730	if (test_opt(&sbi->opt, DAX_ALWAYS)) {
    731		BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE);
    732
    733		if (!sbi->dax_dev) {
    734			errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
    735			clear_opt(&sbi->opt, DAX_ALWAYS);
    736		}
    737	}
    738
    739	sb->s_time_gran = 1;
    740	sb->s_xattr = erofs_xattr_handlers;
    741	sb->s_export_op = &erofs_export_ops;
    742
    743	if (test_opt(&sbi->opt, POSIX_ACL))
    744		sb->s_flags |= SB_POSIXACL;
    745	else
    746		sb->s_flags &= ~SB_POSIXACL;
    747
    748#ifdef CONFIG_EROFS_FS_ZIP
    749	xa_init(&sbi->managed_pslots);
    750#endif
    751
    752	/* get the root inode */
    753	inode = erofs_iget(sb, ROOT_NID(sbi), true);
    754	if (IS_ERR(inode))
    755		return PTR_ERR(inode);
    756
    757	if (!S_ISDIR(inode->i_mode)) {
    758		erofs_err(sb, "rootino(nid %llu) is not a directory(i_mode %o)",
    759			  ROOT_NID(sbi), inode->i_mode);
    760		iput(inode);
    761		return -EINVAL;
    762	}
    763
    764	sb->s_root = d_make_root(inode);
    765	if (!sb->s_root)
    766		return -ENOMEM;
    767
    768	erofs_shrinker_register(sb);
    769	/* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */
    770	err = erofs_init_managed_cache(sb);
    771	if (err)
    772		return err;
    773
    774	err = erofs_register_sysfs(sb);
    775	if (err)
    776		return err;
    777
    778	erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi));
    779	return 0;
    780}
    781
    782static int erofs_fc_get_tree(struct fs_context *fc)
    783{
    784	struct erofs_fs_context *ctx = fc->fs_private;
    785
    786	if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->opt.fsid)
    787		return get_tree_nodev(fc, erofs_fc_fill_super);
    788
    789	return get_tree_bdev(fc, erofs_fc_fill_super);
    790}
    791
    792static int erofs_fc_reconfigure(struct fs_context *fc)
    793{
    794	struct super_block *sb = fc->root->d_sb;
    795	struct erofs_sb_info *sbi = EROFS_SB(sb);
    796	struct erofs_fs_context *ctx = fc->fs_private;
    797
    798	DBG_BUGON(!sb_rdonly(sb));
    799
    800	if (test_opt(&ctx->opt, POSIX_ACL))
    801		fc->sb_flags |= SB_POSIXACL;
    802	else
    803		fc->sb_flags &= ~SB_POSIXACL;
    804
    805	sbi->opt = ctx->opt;
    806
    807	fc->sb_flags |= SB_RDONLY;
    808	return 0;
    809}
    810
    811static int erofs_release_device_info(int id, void *ptr, void *data)
    812{
    813	struct erofs_device_info *dif = ptr;
    814
    815	fs_put_dax(dif->dax_dev);
    816	if (dif->bdev)
    817		blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL);
    818	erofs_fscache_unregister_cookie(&dif->fscache);
    819	kfree(dif->path);
    820	kfree(dif);
    821	return 0;
    822}
    823
    824static void erofs_free_dev_context(struct erofs_dev_context *devs)
    825{
    826	if (!devs)
    827		return;
    828	idr_for_each(&devs->tree, &erofs_release_device_info, NULL);
    829	idr_destroy(&devs->tree);
    830	kfree(devs);
    831}
    832
    833static void erofs_fc_free(struct fs_context *fc)
    834{
    835	struct erofs_fs_context *ctx = fc->fs_private;
    836
    837	erofs_free_dev_context(ctx->devs);
    838	kfree(ctx->opt.fsid);
    839	kfree(ctx);
    840}
    841
    842static const struct fs_context_operations erofs_context_ops = {
    843	.parse_param	= erofs_fc_parse_param,
    844	.get_tree       = erofs_fc_get_tree,
    845	.reconfigure    = erofs_fc_reconfigure,
    846	.free		= erofs_fc_free,
    847};
    848
    849static int erofs_init_fs_context(struct fs_context *fc)
    850{
    851	struct erofs_fs_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
    852
    853	if (!ctx)
    854		return -ENOMEM;
    855	ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL);
    856	if (!ctx->devs) {
    857		kfree(ctx);
    858		return -ENOMEM;
    859	}
    860	fc->fs_private = ctx;
    861
    862	idr_init(&ctx->devs->tree);
    863	init_rwsem(&ctx->devs->rwsem);
    864	erofs_default_options(ctx);
    865	fc->ops = &erofs_context_ops;
    866	return 0;
    867}
    868
    869/*
    870 * could be triggered after deactivate_locked_super()
    871 * is called, thus including umount and failed to initialize.
    872 */
    873static void erofs_kill_sb(struct super_block *sb)
    874{
    875	struct erofs_sb_info *sbi;
    876
    877	WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
    878
    879	if (erofs_is_fscache_mode(sb))
    880		generic_shutdown_super(sb);
    881	else
    882		kill_block_super(sb);
    883
    884	sbi = EROFS_SB(sb);
    885	if (!sbi)
    886		return;
    887
    888	erofs_free_dev_context(sbi->devs);
    889	fs_put_dax(sbi->dax_dev);
    890	erofs_fscache_unregister_cookie(&sbi->s_fscache);
    891	erofs_fscache_unregister_fs(sb);
    892	kfree(sbi->opt.fsid);
    893	kfree(sbi);
    894	sb->s_fs_info = NULL;
    895}
    896
    897/* called when ->s_root is non-NULL */
    898static void erofs_put_super(struct super_block *sb)
    899{
    900	struct erofs_sb_info *const sbi = EROFS_SB(sb);
    901
    902	DBG_BUGON(!sbi);
    903
    904	erofs_unregister_sysfs(sb);
    905	erofs_shrinker_unregister(sb);
    906#ifdef CONFIG_EROFS_FS_ZIP
    907	iput(sbi->managed_cache);
    908	sbi->managed_cache = NULL;
    909#endif
    910	erofs_fscache_unregister_cookie(&sbi->s_fscache);
    911}
    912
    913static struct file_system_type erofs_fs_type = {
    914	.owner          = THIS_MODULE,
    915	.name           = "erofs",
    916	.init_fs_context = erofs_init_fs_context,
    917	.kill_sb        = erofs_kill_sb,
    918	.fs_flags       = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
    919};
    920MODULE_ALIAS_FS("erofs");
    921
    922static int __init erofs_module_init(void)
    923{
    924	int err;
    925
    926	erofs_check_ondisk_layout_definitions();
    927
    928	erofs_inode_cachep = kmem_cache_create("erofs_inode",
    929					       sizeof(struct erofs_inode), 0,
    930					       SLAB_RECLAIM_ACCOUNT,
    931					       erofs_inode_init_once);
    932	if (!erofs_inode_cachep) {
    933		err = -ENOMEM;
    934		goto icache_err;
    935	}
    936
    937	err = erofs_init_shrinker();
    938	if (err)
    939		goto shrinker_err;
    940
    941	err = z_erofs_lzma_init();
    942	if (err)
    943		goto lzma_err;
    944
    945	erofs_pcpubuf_init();
    946	err = z_erofs_init_zip_subsystem();
    947	if (err)
    948		goto zip_err;
    949
    950	err = erofs_init_sysfs();
    951	if (err)
    952		goto sysfs_err;
    953
    954	err = register_filesystem(&erofs_fs_type);
    955	if (err)
    956		goto fs_err;
    957
    958	return 0;
    959
    960fs_err:
    961	erofs_exit_sysfs();
    962sysfs_err:
    963	z_erofs_exit_zip_subsystem();
    964zip_err:
    965	z_erofs_lzma_exit();
    966lzma_err:
    967	erofs_exit_shrinker();
    968shrinker_err:
    969	kmem_cache_destroy(erofs_inode_cachep);
    970icache_err:
    971	return err;
    972}
    973
    974static void __exit erofs_module_exit(void)
    975{
    976	unregister_filesystem(&erofs_fs_type);
    977
    978	/* Ensure all RCU free inodes / pclusters are safe to be destroyed. */
    979	rcu_barrier();
    980
    981	erofs_exit_sysfs();
    982	z_erofs_exit_zip_subsystem();
    983	z_erofs_lzma_exit();
    984	erofs_exit_shrinker();
    985	kmem_cache_destroy(erofs_inode_cachep);
    986	erofs_pcpubuf_exit();
    987}
    988
    989/* get filesystem statistics */
    990static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
    991{
    992	struct super_block *sb = dentry->d_sb;
    993	struct erofs_sb_info *sbi = EROFS_SB(sb);
    994	u64 id = 0;
    995
    996	if (!erofs_is_fscache_mode(sb))
    997		id = huge_encode_dev(sb->s_bdev->bd_dev);
    998
    999	buf->f_type = sb->s_magic;
   1000	buf->f_bsize = EROFS_BLKSIZ;
   1001	buf->f_blocks = sbi->total_blocks;
   1002	buf->f_bfree = buf->f_bavail = 0;
   1003
   1004	buf->f_files = ULLONG_MAX;
   1005	buf->f_ffree = ULLONG_MAX - sbi->inos;
   1006
   1007	buf->f_namelen = EROFS_NAME_LEN;
   1008
   1009	buf->f_fsid    = u64_to_fsid(id);
   1010	return 0;
   1011}
   1012
   1013static int erofs_show_options(struct seq_file *seq, struct dentry *root)
   1014{
   1015	struct erofs_sb_info *sbi = EROFS_SB(root->d_sb);
   1016	struct erofs_mount_opts *opt = &sbi->opt;
   1017
   1018#ifdef CONFIG_EROFS_FS_XATTR
   1019	if (test_opt(opt, XATTR_USER))
   1020		seq_puts(seq, ",user_xattr");
   1021	else
   1022		seq_puts(seq, ",nouser_xattr");
   1023#endif
   1024#ifdef CONFIG_EROFS_FS_POSIX_ACL
   1025	if (test_opt(opt, POSIX_ACL))
   1026		seq_puts(seq, ",acl");
   1027	else
   1028		seq_puts(seq, ",noacl");
   1029#endif
   1030#ifdef CONFIG_EROFS_FS_ZIP
   1031	if (opt->cache_strategy == EROFS_ZIP_CACHE_DISABLED)
   1032		seq_puts(seq, ",cache_strategy=disabled");
   1033	else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAHEAD)
   1034		seq_puts(seq, ",cache_strategy=readahead");
   1035	else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
   1036		seq_puts(seq, ",cache_strategy=readaround");
   1037#endif
   1038	if (test_opt(opt, DAX_ALWAYS))
   1039		seq_puts(seq, ",dax=always");
   1040	if (test_opt(opt, DAX_NEVER))
   1041		seq_puts(seq, ",dax=never");
   1042#ifdef CONFIG_EROFS_FS_ONDEMAND
   1043	if (opt->fsid)
   1044		seq_printf(seq, ",fsid=%s", opt->fsid);
   1045#endif
   1046	return 0;
   1047}
   1048
   1049const struct super_operations erofs_sops = {
   1050	.put_super = erofs_put_super,
   1051	.alloc_inode = erofs_alloc_inode,
   1052	.free_inode = erofs_free_inode,
   1053	.statfs = erofs_statfs,
   1054	.show_options = erofs_show_options,
   1055};
   1056
   1057module_init(erofs_module_init);
   1058module_exit(erofs_module_exit);
   1059
   1060MODULE_DESCRIPTION("Enhanced ROM File System");
   1061MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc.");
   1062MODULE_LICENSE("GPL");