cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

data.c (11802B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2017-2018 HUAWEI, Inc.
      4 *             https://www.huawei.com/
      5 * Copyright (C) 2021, Alibaba Cloud
      6 */
      7#include "internal.h"
      8#include <linux/prefetch.h>
      9#include <linux/sched/mm.h>
     10#include <linux/dax.h>
     11#include <trace/events/erofs.h>
     12
     13void erofs_unmap_metabuf(struct erofs_buf *buf)
     14{
     15	if (buf->kmap_type == EROFS_KMAP)
     16		kunmap(buf->page);
     17	else if (buf->kmap_type == EROFS_KMAP_ATOMIC)
     18		kunmap_atomic(buf->base);
     19	buf->base = NULL;
     20	buf->kmap_type = EROFS_NO_KMAP;
     21}
     22
     23void erofs_put_metabuf(struct erofs_buf *buf)
     24{
     25	if (!buf->page)
     26		return;
     27	erofs_unmap_metabuf(buf);
     28	put_page(buf->page);
     29	buf->page = NULL;
     30}
     31
     32void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
     33		  erofs_blk_t blkaddr, enum erofs_kmap_type type)
     34{
     35	struct address_space *const mapping = inode->i_mapping;
     36	erofs_off_t offset = blknr_to_addr(blkaddr);
     37	pgoff_t index = offset >> PAGE_SHIFT;
     38	struct page *page = buf->page;
     39	struct folio *folio;
     40	unsigned int nofs_flag;
     41
     42	if (!page || page->index != index) {
     43		erofs_put_metabuf(buf);
     44
     45		nofs_flag = memalloc_nofs_save();
     46		folio = read_cache_folio(mapping, index, NULL, NULL);
     47		memalloc_nofs_restore(nofs_flag);
     48		if (IS_ERR(folio))
     49			return folio;
     50
     51		/* should already be PageUptodate, no need to lock page */
     52		page = folio_file_page(folio, index);
     53		buf->page = page;
     54	}
     55	if (buf->kmap_type == EROFS_NO_KMAP) {
     56		if (type == EROFS_KMAP)
     57			buf->base = kmap(page);
     58		else if (type == EROFS_KMAP_ATOMIC)
     59			buf->base = kmap_atomic(page);
     60		buf->kmap_type = type;
     61	} else if (buf->kmap_type != type) {
     62		DBG_BUGON(1);
     63		return ERR_PTR(-EFAULT);
     64	}
     65	if (type == EROFS_NO_KMAP)
     66		return NULL;
     67	return buf->base + (offset & ~PAGE_MASK);
     68}
     69
     70void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
     71			 erofs_blk_t blkaddr, enum erofs_kmap_type type)
     72{
     73	if (erofs_is_fscache_mode(sb))
     74		return erofs_bread(buf, EROFS_SB(sb)->s_fscache->inode,
     75				   blkaddr, type);
     76
     77	return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type);
     78}
     79
     80static int erofs_map_blocks_flatmode(struct inode *inode,
     81				     struct erofs_map_blocks *map,
     82				     int flags)
     83{
     84	erofs_blk_t nblocks, lastblk;
     85	u64 offset = map->m_la;
     86	struct erofs_inode *vi = EROFS_I(inode);
     87	bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
     88
     89	nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
     90	lastblk = nblocks - tailendpacking;
     91
     92	/* there is no hole in flatmode */
     93	map->m_flags = EROFS_MAP_MAPPED;
     94	if (offset < blknr_to_addr(lastblk)) {
     95		map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
     96		map->m_plen = blknr_to_addr(lastblk) - offset;
     97	} else if (tailendpacking) {
     98		/* 2 - inode inline B: inode, [xattrs], inline last blk... */
     99		struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
    100
    101		map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize +
    102			vi->xattr_isize + erofs_blkoff(map->m_la);
    103		map->m_plen = inode->i_size - offset;
    104
    105		/* inline data should be located in the same meta block */
    106		if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) {
    107			erofs_err(inode->i_sb,
    108				  "inline data cross block boundary @ nid %llu",
    109				  vi->nid);
    110			DBG_BUGON(1);
    111			return -EFSCORRUPTED;
    112		}
    113		map->m_flags |= EROFS_MAP_META;
    114	} else {
    115		erofs_err(inode->i_sb,
    116			  "internal error @ nid: %llu (size %llu), m_la 0x%llx",
    117			  vi->nid, inode->i_size, map->m_la);
    118		DBG_BUGON(1);
    119		return -EIO;
    120	}
    121	return 0;
    122}
    123
    124int erofs_map_blocks(struct inode *inode,
    125		     struct erofs_map_blocks *map, int flags)
    126{
    127	struct super_block *sb = inode->i_sb;
    128	struct erofs_inode *vi = EROFS_I(inode);
    129	struct erofs_inode_chunk_index *idx;
    130	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
    131	u64 chunknr;
    132	unsigned int unit;
    133	erofs_off_t pos;
    134	void *kaddr;
    135	int err = 0;
    136
    137	trace_erofs_map_blocks_enter(inode, map, flags);
    138	map->m_deviceid = 0;
    139	if (map->m_la >= inode->i_size) {
    140		/* leave out-of-bound access unmapped */
    141		map->m_flags = 0;
    142		map->m_plen = 0;
    143		goto out;
    144	}
    145
    146	if (vi->datalayout != EROFS_INODE_CHUNK_BASED) {
    147		err = erofs_map_blocks_flatmode(inode, map, flags);
    148		goto out;
    149	}
    150
    151	if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
    152		unit = sizeof(*idx);			/* chunk index */
    153	else
    154		unit = EROFS_BLOCK_MAP_ENTRY_SIZE;	/* block map */
    155
    156	chunknr = map->m_la >> vi->chunkbits;
    157	pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
    158		    vi->xattr_isize, unit) + unit * chunknr;
    159
    160	kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
    161	if (IS_ERR(kaddr)) {
    162		err = PTR_ERR(kaddr);
    163		goto out;
    164	}
    165	map->m_la = chunknr << vi->chunkbits;
    166	map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits,
    167			    roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));
    168
    169	/* handle block map */
    170	if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
    171		__le32 *blkaddr = kaddr + erofs_blkoff(pos);
    172
    173		if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
    174			map->m_flags = 0;
    175		} else {
    176			map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr));
    177			map->m_flags = EROFS_MAP_MAPPED;
    178		}
    179		goto out_unlock;
    180	}
    181	/* parse chunk indexes */
    182	idx = kaddr + erofs_blkoff(pos);
    183	switch (le32_to_cpu(idx->blkaddr)) {
    184	case EROFS_NULL_ADDR:
    185		map->m_flags = 0;
    186		break;
    187	default:
    188		map->m_deviceid = le16_to_cpu(idx->device_id) &
    189			EROFS_SB(sb)->device_id_mask;
    190		map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr));
    191		map->m_flags = EROFS_MAP_MAPPED;
    192		break;
    193	}
    194out_unlock:
    195	erofs_put_metabuf(&buf);
    196out:
    197	if (!err)
    198		map->m_llen = map->m_plen;
    199	trace_erofs_map_blocks_exit(inode, map, flags, 0);
    200	return err;
    201}
    202
    203int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
    204{
    205	struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
    206	struct erofs_device_info *dif;
    207	int id;
    208
    209	/* primary device by default */
    210	map->m_bdev = sb->s_bdev;
    211	map->m_daxdev = EROFS_SB(sb)->dax_dev;
    212	map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
    213	map->m_fscache = EROFS_SB(sb)->s_fscache;
    214
    215	if (map->m_deviceid) {
    216		down_read(&devs->rwsem);
    217		dif = idr_find(&devs->tree, map->m_deviceid - 1);
    218		if (!dif) {
    219			up_read(&devs->rwsem);
    220			return -ENODEV;
    221		}
    222		map->m_bdev = dif->bdev;
    223		map->m_daxdev = dif->dax_dev;
    224		map->m_dax_part_off = dif->dax_part_off;
    225		map->m_fscache = dif->fscache;
    226		up_read(&devs->rwsem);
    227	} else if (devs->extra_devices) {
    228		down_read(&devs->rwsem);
    229		idr_for_each_entry(&devs->tree, dif, id) {
    230			erofs_off_t startoff, length;
    231
    232			if (!dif->mapped_blkaddr)
    233				continue;
    234			startoff = blknr_to_addr(dif->mapped_blkaddr);
    235			length = blknr_to_addr(dif->blocks);
    236
    237			if (map->m_pa >= startoff &&
    238			    map->m_pa < startoff + length) {
    239				map->m_pa -= startoff;
    240				map->m_bdev = dif->bdev;
    241				map->m_daxdev = dif->dax_dev;
    242				map->m_dax_part_off = dif->dax_part_off;
    243				map->m_fscache = dif->fscache;
    244				break;
    245			}
    246		}
    247		up_read(&devs->rwsem);
    248	}
    249	return 0;
    250}
    251
    252static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
    253		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
    254{
    255	int ret;
    256	struct erofs_map_blocks map;
    257	struct erofs_map_dev mdev;
    258
    259	map.m_la = offset;
    260	map.m_llen = length;
    261
    262	ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
    263	if (ret < 0)
    264		return ret;
    265
    266	mdev = (struct erofs_map_dev) {
    267		.m_deviceid = map.m_deviceid,
    268		.m_pa = map.m_pa,
    269	};
    270	ret = erofs_map_dev(inode->i_sb, &mdev);
    271	if (ret)
    272		return ret;
    273
    274	iomap->offset = map.m_la;
    275	if (flags & IOMAP_DAX)
    276		iomap->dax_dev = mdev.m_daxdev;
    277	else
    278		iomap->bdev = mdev.m_bdev;
    279	iomap->length = map.m_llen;
    280	iomap->flags = 0;
    281	iomap->private = NULL;
    282
    283	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
    284		iomap->type = IOMAP_HOLE;
    285		iomap->addr = IOMAP_NULL_ADDR;
    286		if (!iomap->length)
    287			iomap->length = length;
    288		return 0;
    289	}
    290
    291	if (map.m_flags & EROFS_MAP_META) {
    292		void *ptr;
    293		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
    294
    295		iomap->type = IOMAP_INLINE;
    296		ptr = erofs_read_metabuf(&buf, inode->i_sb,
    297					 erofs_blknr(mdev.m_pa), EROFS_KMAP);
    298		if (IS_ERR(ptr))
    299			return PTR_ERR(ptr);
    300		iomap->inline_data = ptr + erofs_blkoff(mdev.m_pa);
    301		iomap->private = buf.base;
    302	} else {
    303		iomap->type = IOMAP_MAPPED;
    304		iomap->addr = mdev.m_pa;
    305		if (flags & IOMAP_DAX)
    306			iomap->addr += mdev.m_dax_part_off;
    307	}
    308	return 0;
    309}
    310
    311static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
    312		ssize_t written, unsigned int flags, struct iomap *iomap)
    313{
    314	void *ptr = iomap->private;
    315
    316	if (ptr) {
    317		struct erofs_buf buf = {
    318			.page = kmap_to_page(ptr),
    319			.base = ptr,
    320			.kmap_type = EROFS_KMAP,
    321		};
    322
    323		DBG_BUGON(iomap->type != IOMAP_INLINE);
    324		erofs_put_metabuf(&buf);
    325	} else {
    326		DBG_BUGON(iomap->type == IOMAP_INLINE);
    327	}
    328	return written;
    329}
    330
    331static const struct iomap_ops erofs_iomap_ops = {
    332	.iomap_begin = erofs_iomap_begin,
    333	.iomap_end = erofs_iomap_end,
    334};
    335
    336int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
    337		 u64 start, u64 len)
    338{
    339	if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) {
    340#ifdef CONFIG_EROFS_FS_ZIP
    341		return iomap_fiemap(inode, fieinfo, start, len,
    342				    &z_erofs_iomap_report_ops);
    343#else
    344		return -EOPNOTSUPP;
    345#endif
    346	}
    347	return iomap_fiemap(inode, fieinfo, start, len, &erofs_iomap_ops);
    348}
    349
    350/*
    351 * since we dont have write or truncate flows, so no inode
    352 * locking needs to be held at the moment.
    353 */
    354static int erofs_read_folio(struct file *file, struct folio *folio)
    355{
    356	return iomap_read_folio(folio, &erofs_iomap_ops);
    357}
    358
    359static void erofs_readahead(struct readahead_control *rac)
    360{
    361	return iomap_readahead(rac, &erofs_iomap_ops);
    362}
    363
    364static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
    365{
    366	return iomap_bmap(mapping, block, &erofs_iomap_ops);
    367}
    368
    369static int erofs_prepare_dio(struct kiocb *iocb, struct iov_iter *to)
    370{
    371	struct inode *inode = file_inode(iocb->ki_filp);
    372	loff_t align = iocb->ki_pos | iov_iter_count(to) |
    373		iov_iter_alignment(to);
    374	struct block_device *bdev = inode->i_sb->s_bdev;
    375	unsigned int blksize_mask;
    376
    377	if (bdev)
    378		blksize_mask = (1 << ilog2(bdev_logical_block_size(bdev))) - 1;
    379	else
    380		blksize_mask = (1 << inode->i_blkbits) - 1;
    381
    382	if (align & blksize_mask)
    383		return -EINVAL;
    384	return 0;
    385}
    386
    387static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
    388{
    389	/* no need taking (shared) inode lock since it's a ro filesystem */
    390	if (!iov_iter_count(to))
    391		return 0;
    392
    393#ifdef CONFIG_FS_DAX
    394	if (IS_DAX(iocb->ki_filp->f_mapping->host))
    395		return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
    396#endif
    397	if (iocb->ki_flags & IOCB_DIRECT) {
    398		int err = erofs_prepare_dio(iocb, to);
    399
    400		if (!err)
    401			return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
    402					    NULL, 0, NULL, 0);
    403		if (err < 0)
    404			return err;
    405	}
    406	return filemap_read(iocb, to, 0);
    407}
    408
    409/* for uncompressed (aligned) files and raw access for other files */
    410const struct address_space_operations erofs_raw_access_aops = {
    411	.read_folio = erofs_read_folio,
    412	.readahead = erofs_readahead,
    413	.bmap = erofs_bmap,
    414	.direct_IO = noop_direct_IO,
    415};
    416
    417#ifdef CONFIG_FS_DAX
    418static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf,
    419		enum page_entry_size pe_size)
    420{
    421	return dax_iomap_fault(vmf, pe_size, NULL, NULL, &erofs_iomap_ops);
    422}
    423
    424static vm_fault_t erofs_dax_fault(struct vm_fault *vmf)
    425{
    426	return erofs_dax_huge_fault(vmf, PE_SIZE_PTE);
    427}
    428
    429static const struct vm_operations_struct erofs_dax_vm_ops = {
    430	.fault		= erofs_dax_fault,
    431	.huge_fault	= erofs_dax_huge_fault,
    432};
    433
    434static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma)
    435{
    436	if (!IS_DAX(file_inode(file)))
    437		return generic_file_readonly_mmap(file, vma);
    438
    439	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
    440		return -EINVAL;
    441
    442	vma->vm_ops = &erofs_dax_vm_ops;
    443	vma->vm_flags |= VM_HUGEPAGE;
    444	return 0;
    445}
    446#else
    447#define erofs_file_mmap	generic_file_readonly_mmap
    448#endif
    449
    450const struct file_operations erofs_file_fops = {
    451	.llseek		= generic_file_llseek,
    452	.read_iter	= erofs_file_read_iter,
    453	.mmap		= erofs_file_mmap,
    454	.splice_read	= generic_file_splice_read,
    455};