block_validity.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
block_validity.c (9766B)
      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 *  linux/fs/ext4/block_validity.c
      4 *
      5 * Copyright (C) 2009
      6 * Theodore Ts'o (tytso@mit.edu)
      7 *
      8 * Track which blocks in the filesystem are metadata blocks that
      9 * should never be used as data blocks by files or directories.
     10 */
     11
     12#include <linux/time.h>
     13#include <linux/fs.h>
     14#include <linux/namei.h>
     15#include <linux/quotaops.h>
     16#include <linux/buffer_head.h>
     17#include <linux/swap.h>
     18#include <linux/pagemap.h>
     19#include <linux/blkdev.h>
     20#include <linux/slab.h>
     21#include "ext4.h"
     22
     23struct ext4_system_zone {
     24	struct rb_node	node;
     25	ext4_fsblk_t	start_blk;
     26	unsigned int	count;
     27	u32		ino;
     28};
     29
     30static struct kmem_cache *ext4_system_zone_cachep;
     31
     32int __init ext4_init_system_zone(void)
     33{
     34	ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
     35	if (ext4_system_zone_cachep == NULL)
     36		return -ENOMEM;
     37	return 0;
     38}
     39
     40void ext4_exit_system_zone(void)
     41{
     42	rcu_barrier();
     43	kmem_cache_destroy(ext4_system_zone_cachep);
     44}
     45
     46static inline int can_merge(struct ext4_system_zone *entry1,
     47		     struct ext4_system_zone *entry2)
     48{
     49	if ((entry1->start_blk + entry1->count) == entry2->start_blk &&
     50	    entry1->ino == entry2->ino)
     51		return 1;
     52	return 0;
     53}
     54
     55static void release_system_zone(struct ext4_system_blocks *system_blks)
     56{
     57	struct ext4_system_zone	*entry, *n;
     58
     59	rbtree_postorder_for_each_entry_safe(entry, n,
     60				&system_blks->root, node)
     61		kmem_cache_free(ext4_system_zone_cachep, entry);
     62}
     63
     64/*
     65 * Mark a range of blocks as belonging to the "system zone" --- that
     66 * is, filesystem metadata blocks which should never be used by
     67 * inodes.
     68 */
     69static int add_system_zone(struct ext4_system_blocks *system_blks,
     70			   ext4_fsblk_t start_blk,
     71			   unsigned int count, u32 ino)
     72{
     73	struct ext4_system_zone *new_entry, *entry;
     74	struct rb_node **n = &system_blks->root.rb_node, *node;
     75	struct rb_node *parent = NULL, *new_node = NULL;
     76
     77	while (*n) {
     78		parent = *n;
     79		entry = rb_entry(parent, struct ext4_system_zone, node);
     80		if (start_blk < entry->start_blk)
     81			n = &(*n)->rb_left;
     82		else if (start_blk >= (entry->start_blk + entry->count))
     83			n = &(*n)->rb_right;
     84		else	/* Unexpected overlap of system zones. */
     85			return -EFSCORRUPTED;
     86	}
     87
     88	new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
     89				     GFP_KERNEL);
     90	if (!new_entry)
     91		return -ENOMEM;
     92	new_entry->start_blk = start_blk;
     93	new_entry->count = count;
     94	new_entry->ino = ino;
     95	new_node = &new_entry->node;
     96
     97	rb_link_node(new_node, parent, n);
     98	rb_insert_color(new_node, &system_blks->root);
     99
    100	/* Can we merge to the left? */
    101	node = rb_prev(new_node);
    102	if (node) {
    103		entry = rb_entry(node, struct ext4_system_zone, node);
    104		if (can_merge(entry, new_entry)) {
    105			new_entry->start_blk = entry->start_blk;
    106			new_entry->count += entry->count;
    107			rb_erase(node, &system_blks->root);
    108			kmem_cache_free(ext4_system_zone_cachep, entry);
    109		}
    110	}
    111
    112	/* Can we merge to the right? */
    113	node = rb_next(new_node);
    114	if (node) {
    115		entry = rb_entry(node, struct ext4_system_zone, node);
    116		if (can_merge(new_entry, entry)) {
    117			new_entry->count += entry->count;
    118			rb_erase(node, &system_blks->root);
    119			kmem_cache_free(ext4_system_zone_cachep, entry);
    120		}
    121	}
    122	return 0;
    123}
    124
    125static void debug_print_tree(struct ext4_sb_info *sbi)
    126{
    127	struct rb_node *node;
    128	struct ext4_system_zone *entry;
    129	struct ext4_system_blocks *system_blks;
    130	int first = 1;
    131
    132	printk(KERN_INFO "System zones: ");
    133	rcu_read_lock();
    134	system_blks = rcu_dereference(sbi->s_system_blks);
    135	node = rb_first(&system_blks->root);
    136	while (node) {
    137		entry = rb_entry(node, struct ext4_system_zone, node);
    138		printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ",
    139		       entry->start_blk, entry->start_blk + entry->count - 1);
    140		first = 0;
    141		node = rb_next(node);
    142	}
    143	rcu_read_unlock();
    144	printk(KERN_CONT "\n");
    145}
    146
    147static int ext4_protect_reserved_inode(struct super_block *sb,
    148				       struct ext4_system_blocks *system_blks,
    149				       u32 ino)
    150{
    151	struct inode *inode;
    152	struct ext4_sb_info *sbi = EXT4_SB(sb);
    153	struct ext4_map_blocks map;
    154	u32 i = 0, num;
    155	int err = 0, n;
    156
    157	if ((ino < EXT4_ROOT_INO) ||
    158	    (ino > le32_to_cpu(sbi->s_es->s_inodes_count)))
    159		return -EINVAL;
    160	inode = ext4_iget(sb, ino, EXT4_IGET_SPECIAL);
    161	if (IS_ERR(inode))
    162		return PTR_ERR(inode);
    163	num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
    164	while (i < num) {
    165		cond_resched();
    166		map.m_lblk = i;
    167		map.m_len = num - i;
    168		n = ext4_map_blocks(NULL, inode, &map, 0);
    169		if (n < 0) {
    170			err = n;
    171			break;
    172		}
    173		if (n == 0) {
    174			i++;
    175		} else {
    176			err = add_system_zone(system_blks, map.m_pblk, n, ino);
    177			if (err < 0) {
    178				if (err == -EFSCORRUPTED) {
    179					EXT4_ERROR_INODE_ERR(inode, -err,
    180						"blocks %llu-%llu from inode overlap system zone",
    181						map.m_pblk,
    182						map.m_pblk + map.m_len - 1);
    183				}
    184				break;
    185			}
    186			i += n;
    187		}
    188	}
    189	iput(inode);
    190	return err;
    191}
    192
    193static void ext4_destroy_system_zone(struct rcu_head *rcu)
    194{
    195	struct ext4_system_blocks *system_blks;
    196
    197	system_blks = container_of(rcu, struct ext4_system_blocks, rcu);
    198	release_system_zone(system_blks);
    199	kfree(system_blks);
    200}
    201
    202/*
    203 * Build system zone rbtree which is used for block validity checking.
    204 *
    205 * The update of system_blks pointer in this function is protected by
    206 * sb->s_umount semaphore. However we have to be careful as we can be
    207 * racing with ext4_inode_block_valid() calls reading system_blks rbtree
    208 * protected only by RCU. That's why we first build the rbtree and then
    209 * swap it in place.
    210 */
    211int ext4_setup_system_zone(struct super_block *sb)
    212{
    213	ext4_group_t ngroups = ext4_get_groups_count(sb);
    214	struct ext4_sb_info *sbi = EXT4_SB(sb);
    215	struct ext4_system_blocks *system_blks;
    216	struct ext4_group_desc *gdp;
    217	ext4_group_t i;
    218	int flex_size = ext4_flex_bg_size(sbi);
    219	int ret;
    220
    221	system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
    222	if (!system_blks)
    223		return -ENOMEM;
    224
    225	for (i=0; i < ngroups; i++) {
    226		cond_resched();
    227		if (ext4_bg_has_super(sb, i) &&
    228		    ((i < 5) || ((i % flex_size) == 0))) {
    229			ret = add_system_zone(system_blks,
    230					ext4_group_first_block_no(sb, i),
    231					ext4_bg_num_gdb(sb, i) + 1, 0);
    232			if (ret)
    233				goto err;
    234		}
    235		gdp = ext4_get_group_desc(sb, i, NULL);
    236		ret = add_system_zone(system_blks,
    237				ext4_block_bitmap(sb, gdp), 1, 0);
    238		if (ret)
    239			goto err;
    240		ret = add_system_zone(system_blks,
    241				ext4_inode_bitmap(sb, gdp), 1, 0);
    242		if (ret)
    243			goto err;
    244		ret = add_system_zone(system_blks,
    245				ext4_inode_table(sb, gdp),
    246				sbi->s_itb_per_group, 0);
    247		if (ret)
    248			goto err;
    249	}
    250	if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) {
    251		ret = ext4_protect_reserved_inode(sb, system_blks,
    252				le32_to_cpu(sbi->s_es->s_journal_inum));
    253		if (ret)
    254			goto err;
    255	}
    256
    257	/*
    258	 * System blks rbtree complete, announce it once to prevent racing
    259	 * with ext4_inode_block_valid() accessing the rbtree at the same
    260	 * time.
    261	 */
    262	rcu_assign_pointer(sbi->s_system_blks, system_blks);
    263
    264	if (test_opt(sb, DEBUG))
    265		debug_print_tree(sbi);
    266	return 0;
    267err:
    268	release_system_zone(system_blks);
    269	kfree(system_blks);
    270	return ret;
    271}
    272
    273/*
    274 * Called when the filesystem is unmounted or when remounting it with
    275 * noblock_validity specified.
    276 *
    277 * The update of system_blks pointer in this function is protected by
    278 * sb->s_umount semaphore. However we have to be careful as we can be
    279 * racing with ext4_inode_block_valid() calls reading system_blks rbtree
    280 * protected only by RCU. So we first clear the system_blks pointer and
    281 * then free the rbtree only after RCU grace period expires.
    282 */
    283void ext4_release_system_zone(struct super_block *sb)
    284{
    285	struct ext4_system_blocks *system_blks;
    286
    287	system_blks = rcu_dereference_protected(EXT4_SB(sb)->s_system_blks,
    288					lockdep_is_held(&sb->s_umount));
    289	rcu_assign_pointer(EXT4_SB(sb)->s_system_blks, NULL);
    290
    291	if (system_blks)
    292		call_rcu(&system_blks->rcu, ext4_destroy_system_zone);
    293}
    294
    295int ext4_sb_block_valid(struct super_block *sb, struct inode *inode,
    296				ext4_fsblk_t start_blk, unsigned int count)
    297{
    298	struct ext4_sb_info *sbi = EXT4_SB(sb);
    299	struct ext4_system_blocks *system_blks;
    300	struct ext4_system_zone *entry;
    301	struct rb_node *n;
    302	int ret = 1;
    303
    304	if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
    305	    (start_blk + count < start_blk) ||
    306	    (start_blk + count > ext4_blocks_count(sbi->s_es)))
    307		return 0;
    308
    309	/*
    310	 * Lock the system zone to prevent it being released concurrently
    311	 * when doing a remount which inverse current "[no]block_validity"
    312	 * mount option.
    313	 */
    314	rcu_read_lock();
    315	system_blks = rcu_dereference(sbi->s_system_blks);
    316	if (system_blks == NULL)
    317		goto out_rcu;
    318
    319	n = system_blks->root.rb_node;
    320	while (n) {
    321		entry = rb_entry(n, struct ext4_system_zone, node);
    322		if (start_blk + count - 1 < entry->start_blk)
    323			n = n->rb_left;
    324		else if (start_blk >= (entry->start_blk + entry->count))
    325			n = n->rb_right;
    326		else {
    327			ret = 0;
    328			if (inode)
    329				ret = (entry->ino == inode->i_ino);
    330			break;
    331		}
    332	}
    333out_rcu:
    334	rcu_read_unlock();
    335	return ret;
    336}
    337
    338/*
    339 * Returns 1 if the passed-in block region (start_blk,
    340 * start_blk+count) is valid; 0 if some part of the block region
    341 * overlaps with some other filesystem metadata blocks.
    342 */
    343int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
    344			  unsigned int count)
    345{
    346	return ext4_sb_block_valid(inode->i_sb, inode, start_blk, count);
    347}
    348
    349int ext4_check_blockref(const char *function, unsigned int line,
    350			struct inode *inode, __le32 *p, unsigned int max)
    351{
    352	__le32 *bref = p;
    353	unsigned int blk;
    354
    355	if (ext4_has_feature_journal(inode->i_sb) &&
    356	    (inode->i_ino ==
    357	     le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
    358		return 0;
    359
    360	while (bref < p+max) {
    361		blk = le32_to_cpu(*bref++);
    362		if (blk &&
    363		    unlikely(!ext4_inode_block_valid(inode, blk, 1))) {
    364			ext4_error_inode(inode, function, line, blk,
    365					 "invalid block");
    366			return -EFSCORRUPTED;
    367		}
    368	}
    369	return 0;
    370}
    371