readpage.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
readpage.c (11328B)
      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * linux/fs/ext4/readpage.c
      4 *
      5 * Copyright (C) 2002, Linus Torvalds.
      6 * Copyright (C) 2015, Google, Inc.
      7 *
      8 * This was originally taken from fs/mpage.c
      9 *
     10 * The ext4_mpage_readpages() function here is intended to
     11 * replace mpage_readahead() in the general case, not just for
     12 * encrypted files.  It has some limitations (see below), where it
     13 * will fall back to read_block_full_page(), but these limitations
     14 * should only be hit when page_size != block_size.
     15 *
     16 * This will allow us to attach a callback function to support ext4
     17 * encryption.
     18 *
     19 * If anything unusual happens, such as:
     20 *
     21 * - encountering a page which has buffers
     22 * - encountering a page which has a non-hole after a hole
     23 * - encountering a page with non-contiguous blocks
     24 *
     25 * then this code just gives up and calls the buffer_head-based read function.
     26 * It does handle a page which has holes at the end - that is a common case:
     27 * the end-of-file on blocksize < PAGE_SIZE setups.
     28 *
     29 */
     30
     31#include <linux/kernel.h>
     32#include <linux/export.h>
     33#include <linux/mm.h>
     34#include <linux/kdev_t.h>
     35#include <linux/gfp.h>
     36#include <linux/bio.h>
     37#include <linux/fs.h>
     38#include <linux/buffer_head.h>
     39#include <linux/blkdev.h>
     40#include <linux/highmem.h>
     41#include <linux/prefetch.h>
     42#include <linux/mpage.h>
     43#include <linux/writeback.h>
     44#include <linux/backing-dev.h>
     45#include <linux/pagevec.h>
     46
     47#include "ext4.h"
     48
     49#define NUM_PREALLOC_POST_READ_CTXS	128
     50
     51static struct kmem_cache *bio_post_read_ctx_cache;
     52static mempool_t *bio_post_read_ctx_pool;
     53
     54/* postprocessing steps for read bios */
     55enum bio_post_read_step {
     56	STEP_INITIAL = 0,
     57	STEP_DECRYPT,
     58	STEP_VERITY,
     59	STEP_MAX,
     60};
     61
     62struct bio_post_read_ctx {
     63	struct bio *bio;
     64	struct work_struct work;
     65	unsigned int cur_step;
     66	unsigned int enabled_steps;
     67};
     68
     69static void __read_end_io(struct bio *bio)
     70{
     71	struct page *page;
     72	struct bio_vec *bv;
     73	struct bvec_iter_all iter_all;
     74
     75	bio_for_each_segment_all(bv, bio, iter_all) {
     76		page = bv->bv_page;
     77
     78		/* PG_error was set if any post_read step failed */
     79		if (bio->bi_status || PageError(page)) {
     80			ClearPageUptodate(page);
     81			/* will re-read again later */
     82			ClearPageError(page);
     83		} else {
     84			SetPageUptodate(page);
     85		}
     86		unlock_page(page);
     87	}
     88	if (bio->bi_private)
     89		mempool_free(bio->bi_private, bio_post_read_ctx_pool);
     90	bio_put(bio);
     91}
     92
     93static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
     94
     95static void decrypt_work(struct work_struct *work)
     96{
     97	struct bio_post_read_ctx *ctx =
     98		container_of(work, struct bio_post_read_ctx, work);
     99
    100	fscrypt_decrypt_bio(ctx->bio);
    101
    102	bio_post_read_processing(ctx);
    103}
    104
    105static void verity_work(struct work_struct *work)
    106{
    107	struct bio_post_read_ctx *ctx =
    108		container_of(work, struct bio_post_read_ctx, work);
    109	struct bio *bio = ctx->bio;
    110
    111	/*
    112	 * fsverity_verify_bio() may call readahead() again, and although verity
    113	 * will be disabled for that, decryption may still be needed, causing
    114	 * another bio_post_read_ctx to be allocated.  So to guarantee that
    115	 * mempool_alloc() never deadlocks we must free the current ctx first.
    116	 * This is safe because verity is the last post-read step.
    117	 */
    118	BUILD_BUG_ON(STEP_VERITY + 1 != STEP_MAX);
    119	mempool_free(ctx, bio_post_read_ctx_pool);
    120	bio->bi_private = NULL;
    121
    122	fsverity_verify_bio(bio);
    123
    124	__read_end_io(bio);
    125}
    126
    127static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
    128{
    129	/*
    130	 * We use different work queues for decryption and for verity because
    131	 * verity may require reading metadata pages that need decryption, and
    132	 * we shouldn't recurse to the same workqueue.
    133	 */
    134	switch (++ctx->cur_step) {
    135	case STEP_DECRYPT:
    136		if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
    137			INIT_WORK(&ctx->work, decrypt_work);
    138			fscrypt_enqueue_decrypt_work(&ctx->work);
    139			return;
    140		}
    141		ctx->cur_step++;
    142		fallthrough;
    143	case STEP_VERITY:
    144		if (ctx->enabled_steps & (1 << STEP_VERITY)) {
    145			INIT_WORK(&ctx->work, verity_work);
    146			fsverity_enqueue_verify_work(&ctx->work);
    147			return;
    148		}
    149		ctx->cur_step++;
    150		fallthrough;
    151	default:
    152		__read_end_io(ctx->bio);
    153	}
    154}
    155
    156static bool bio_post_read_required(struct bio *bio)
    157{
    158	return bio->bi_private && !bio->bi_status;
    159}
    160
    161/*
    162 * I/O completion handler for multipage BIOs.
    163 *
    164 * The mpage code never puts partial pages into a BIO (except for end-of-file).
    165 * If a page does not map to a contiguous run of blocks then it simply falls
    166 * back to block_read_full_folio().
    167 *
    168 * Why is this?  If a page's completion depends on a number of different BIOs
    169 * which can complete in any order (or at the same time) then determining the
    170 * status of that page is hard.  See end_buffer_async_read() for the details.
    171 * There is no point in duplicating all that complexity.
    172 */
    173static void mpage_end_io(struct bio *bio)
    174{
    175	if (bio_post_read_required(bio)) {
    176		struct bio_post_read_ctx *ctx = bio->bi_private;
    177
    178		ctx->cur_step = STEP_INITIAL;
    179		bio_post_read_processing(ctx);
    180		return;
    181	}
    182	__read_end_io(bio);
    183}
    184
    185static inline bool ext4_need_verity(const struct inode *inode, pgoff_t idx)
    186{
    187	return fsverity_active(inode) &&
    188	       idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
    189}
    190
    191static void ext4_set_bio_post_read_ctx(struct bio *bio,
    192				       const struct inode *inode,
    193				       pgoff_t first_idx)
    194{
    195	unsigned int post_read_steps = 0;
    196
    197	if (fscrypt_inode_uses_fs_layer_crypto(inode))
    198		post_read_steps |= 1 << STEP_DECRYPT;
    199
    200	if (ext4_need_verity(inode, first_idx))
    201		post_read_steps |= 1 << STEP_VERITY;
    202
    203	if (post_read_steps) {
    204		/* Due to the mempool, this never fails. */
    205		struct bio_post_read_ctx *ctx =
    206			mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
    207
    208		ctx->bio = bio;
    209		ctx->enabled_steps = post_read_steps;
    210		bio->bi_private = ctx;
    211	}
    212}
    213
    214static inline loff_t ext4_readpage_limit(struct inode *inode)
    215{
    216	if (IS_ENABLED(CONFIG_FS_VERITY) &&
    217	    (IS_VERITY(inode) || ext4_verity_in_progress(inode)))
    218		return inode->i_sb->s_maxbytes;
    219
    220	return i_size_read(inode);
    221}
    222
    223int ext4_mpage_readpages(struct inode *inode,
    224		struct readahead_control *rac, struct page *page)
    225{
    226	struct bio *bio = NULL;
    227	sector_t last_block_in_bio = 0;
    228
    229	const unsigned blkbits = inode->i_blkbits;
    230	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
    231	const unsigned blocksize = 1 << blkbits;
    232	sector_t next_block;
    233	sector_t block_in_file;
    234	sector_t last_block;
    235	sector_t last_block_in_file;
    236	sector_t blocks[MAX_BUF_PER_PAGE];
    237	unsigned page_block;
    238	struct block_device *bdev = inode->i_sb->s_bdev;
    239	int length;
    240	unsigned relative_block = 0;
    241	struct ext4_map_blocks map;
    242	unsigned int nr_pages = rac ? readahead_count(rac) : 1;
    243
    244	map.m_pblk = 0;
    245	map.m_lblk = 0;
    246	map.m_len = 0;
    247	map.m_flags = 0;
    248
    249	for (; nr_pages; nr_pages--) {
    250		int fully_mapped = 1;
    251		unsigned first_hole = blocks_per_page;
    252
    253		if (rac) {
    254			page = readahead_page(rac);
    255			prefetchw(&page->flags);
    256		}
    257
    258		if (page_has_buffers(page))
    259			goto confused;
    260
    261		block_in_file = next_block =
    262			(sector_t)page->index << (PAGE_SHIFT - blkbits);
    263		last_block = block_in_file + nr_pages * blocks_per_page;
    264		last_block_in_file = (ext4_readpage_limit(inode) +
    265				      blocksize - 1) >> blkbits;
    266		if (last_block > last_block_in_file)
    267			last_block = last_block_in_file;
    268		page_block = 0;
    269
    270		/*
    271		 * Map blocks using the previous result first.
    272		 */
    273		if ((map.m_flags & EXT4_MAP_MAPPED) &&
    274		    block_in_file > map.m_lblk &&
    275		    block_in_file < (map.m_lblk + map.m_len)) {
    276			unsigned map_offset = block_in_file - map.m_lblk;
    277			unsigned last = map.m_len - map_offset;
    278
    279			for (relative_block = 0; ; relative_block++) {
    280				if (relative_block == last) {
    281					/* needed? */
    282					map.m_flags &= ~EXT4_MAP_MAPPED;
    283					break;
    284				}
    285				if (page_block == blocks_per_page)
    286					break;
    287				blocks[page_block] = map.m_pblk + map_offset +
    288					relative_block;
    289				page_block++;
    290				block_in_file++;
    291			}
    292		}
    293
    294		/*
    295		 * Then do more ext4_map_blocks() calls until we are
    296		 * done with this page.
    297		 */
    298		while (page_block < blocks_per_page) {
    299			if (block_in_file < last_block) {
    300				map.m_lblk = block_in_file;
    301				map.m_len = last_block - block_in_file;
    302
    303				if (ext4_map_blocks(NULL, inode, &map, 0) < 0) {
    304				set_error_page:
    305					SetPageError(page);
    306					zero_user_segment(page, 0,
    307							  PAGE_SIZE);
    308					unlock_page(page);
    309					goto next_page;
    310				}
    311			}
    312			if ((map.m_flags & EXT4_MAP_MAPPED) == 0) {
    313				fully_mapped = 0;
    314				if (first_hole == blocks_per_page)
    315					first_hole = page_block;
    316				page_block++;
    317				block_in_file++;
    318				continue;
    319			}
    320			if (first_hole != blocks_per_page)
    321				goto confused;		/* hole -> non-hole */
    322
    323			/* Contiguous blocks? */
    324			if (page_block && blocks[page_block-1] != map.m_pblk-1)
    325				goto confused;
    326			for (relative_block = 0; ; relative_block++) {
    327				if (relative_block == map.m_len) {
    328					/* needed? */
    329					map.m_flags &= ~EXT4_MAP_MAPPED;
    330					break;
    331				} else if (page_block == blocks_per_page)
    332					break;
    333				blocks[page_block] = map.m_pblk+relative_block;
    334				page_block++;
    335				block_in_file++;
    336			}
    337		}
    338		if (first_hole != blocks_per_page) {
    339			zero_user_segment(page, first_hole << blkbits,
    340					  PAGE_SIZE);
    341			if (first_hole == 0) {
    342				if (ext4_need_verity(inode, page->index) &&
    343				    !fsverity_verify_page(page))
    344					goto set_error_page;
    345				SetPageUptodate(page);
    346				unlock_page(page);
    347				goto next_page;
    348			}
    349		} else if (fully_mapped) {
    350			SetPageMappedToDisk(page);
    351		}
    352
    353		/*
    354		 * This page will go to BIO.  Do we need to send this
    355		 * BIO off first?
    356		 */
    357		if (bio && (last_block_in_bio != blocks[0] - 1 ||
    358			    !fscrypt_mergeable_bio(bio, inode, next_block))) {
    359		submit_and_realloc:
    360			submit_bio(bio);
    361			bio = NULL;
    362		}
    363		if (bio == NULL) {
    364			/*
    365			 * bio_alloc will _always_ be able to allocate a bio if
    366			 * __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset().
    367			 */
    368			bio = bio_alloc(bdev, bio_max_segs(nr_pages),
    369					REQ_OP_READ, GFP_KERNEL);
    370			fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
    371						  GFP_KERNEL);
    372			ext4_set_bio_post_read_ctx(bio, inode, page->index);
    373			bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
    374			bio->bi_end_io = mpage_end_io;
    375			if (rac)
    376				bio->bi_opf |= REQ_RAHEAD;
    377		}
    378
    379		length = first_hole << blkbits;
    380		if (bio_add_page(bio, page, length, 0) < length)
    381			goto submit_and_realloc;
    382
    383		if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
    384		     (relative_block == map.m_len)) ||
    385		    (first_hole != blocks_per_page)) {
    386			submit_bio(bio);
    387			bio = NULL;
    388		} else
    389			last_block_in_bio = blocks[blocks_per_page - 1];
    390		goto next_page;
    391	confused:
    392		if (bio) {
    393			submit_bio(bio);
    394			bio = NULL;
    395		}
    396		if (!PageUptodate(page))
    397			block_read_full_folio(page_folio(page), ext4_get_block);
    398		else
    399			unlock_page(page);
    400	next_page:
    401		if (rac)
    402			put_page(page);
    403	}
    404	if (bio)
    405		submit_bio(bio);
    406	return 0;
    407}
    408
    409int __init ext4_init_post_read_processing(void)
    410{
    411	bio_post_read_ctx_cache =
    412		kmem_cache_create("ext4_bio_post_read_ctx",
    413				  sizeof(struct bio_post_read_ctx), 0, 0, NULL);
    414	if (!bio_post_read_ctx_cache)
    415		goto fail;
    416	bio_post_read_ctx_pool =
    417		mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
    418					 bio_post_read_ctx_cache);
    419	if (!bio_post_read_ctx_pool)
    420		goto fail_free_cache;
    421	return 0;
    422
    423fail_free_cache:
    424	kmem_cache_destroy(bio_post_read_ctx_cache);
    425fail:
    426	return -ENOMEM;
    427}
    428
    429void ext4_exit_post_read_processing(void)
    430{
    431	mempool_destroy(bio_post_read_ctx_pool);
    432	kmem_cache_destroy(bio_post_read_ctx_cache);
    433}