cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

extent_tree.c (15020B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (c) 2014-2016 Christoph Hellwig.
      4 */
      5
      6#include <linux/vmalloc.h>
      7
      8#include "blocklayout.h"
      9
     10#define NFSDBG_FACILITY		NFSDBG_PNFS_LD
     11
     12static inline struct pnfs_block_extent *
     13ext_node(struct rb_node *node)
     14{
     15	return rb_entry(node, struct pnfs_block_extent, be_node);
     16}
     17
     18static struct pnfs_block_extent *
     19ext_tree_first(struct rb_root *root)
     20{
     21	struct rb_node *node = rb_first(root);
     22	return node ? ext_node(node) : NULL;
     23}
     24
     25static struct pnfs_block_extent *
     26ext_tree_prev(struct pnfs_block_extent *be)
     27{
     28	struct rb_node *node = rb_prev(&be->be_node);
     29	return node ? ext_node(node) : NULL;
     30}
     31
     32static struct pnfs_block_extent *
     33ext_tree_next(struct pnfs_block_extent *be)
     34{
     35	struct rb_node *node = rb_next(&be->be_node);
     36	return node ? ext_node(node) : NULL;
     37}
     38
     39static inline sector_t
     40ext_f_end(struct pnfs_block_extent *be)
     41{
     42	return be->be_f_offset + be->be_length;
     43}
     44
     45static struct pnfs_block_extent *
     46__ext_tree_search(struct rb_root *root, sector_t start)
     47{
     48	struct rb_node *node = root->rb_node;
     49	struct pnfs_block_extent *be = NULL;
     50
     51	while (node) {
     52		be = ext_node(node);
     53		if (start < be->be_f_offset)
     54			node = node->rb_left;
     55		else if (start >= ext_f_end(be))
     56			node = node->rb_right;
     57		else
     58			return be;
     59	}
     60
     61	if (be) {
     62		if (start < be->be_f_offset)
     63			return be;
     64
     65		if (start >= ext_f_end(be))
     66			return ext_tree_next(be);
     67	}
     68
     69	return NULL;
     70}
     71
     72static bool
     73ext_can_merge(struct pnfs_block_extent *be1, struct pnfs_block_extent *be2)
     74{
     75	if (be1->be_state != be2->be_state)
     76		return false;
     77	if (be1->be_device != be2->be_device)
     78		return false;
     79
     80	if (be1->be_f_offset + be1->be_length != be2->be_f_offset)
     81		return false;
     82
     83	if (be1->be_state != PNFS_BLOCK_NONE_DATA &&
     84	    (be1->be_v_offset + be1->be_length != be2->be_v_offset))
     85		return false;
     86
     87	if (be1->be_state == PNFS_BLOCK_INVALID_DATA &&
     88	    be1->be_tag != be2->be_tag)
     89		return false;
     90
     91	return true;
     92}
     93
     94static struct pnfs_block_extent *
     95ext_try_to_merge_left(struct rb_root *root, struct pnfs_block_extent *be)
     96{
     97	struct pnfs_block_extent *left = ext_tree_prev(be);
     98
     99	if (left && ext_can_merge(left, be)) {
    100		left->be_length += be->be_length;
    101		rb_erase(&be->be_node, root);
    102		nfs4_put_deviceid_node(be->be_device);
    103		kfree(be);
    104		return left;
    105	}
    106
    107	return be;
    108}
    109
    110static struct pnfs_block_extent *
    111ext_try_to_merge_right(struct rb_root *root, struct pnfs_block_extent *be)
    112{
    113	struct pnfs_block_extent *right = ext_tree_next(be);
    114
    115	if (right && ext_can_merge(be, right)) {
    116		be->be_length += right->be_length;
    117		rb_erase(&right->be_node, root);
    118		nfs4_put_deviceid_node(right->be_device);
    119		kfree(right);
    120	}
    121
    122	return be;
    123}
    124
    125static void __ext_put_deviceids(struct list_head *head)
    126{
    127	struct pnfs_block_extent *be, *tmp;
    128
    129	list_for_each_entry_safe(be, tmp, head, be_list) {
    130		nfs4_put_deviceid_node(be->be_device);
    131		kfree(be);
    132	}
    133}
    134
    135static void
    136__ext_tree_insert(struct rb_root *root,
    137		struct pnfs_block_extent *new, bool merge_ok)
    138{
    139	struct rb_node **p = &root->rb_node, *parent = NULL;
    140	struct pnfs_block_extent *be;
    141
    142	while (*p) {
    143		parent = *p;
    144		be = ext_node(parent);
    145
    146		if (new->be_f_offset < be->be_f_offset) {
    147			if (merge_ok && ext_can_merge(new, be)) {
    148				be->be_f_offset = new->be_f_offset;
    149				if (be->be_state != PNFS_BLOCK_NONE_DATA)
    150					be->be_v_offset = new->be_v_offset;
    151				be->be_length += new->be_length;
    152				be = ext_try_to_merge_left(root, be);
    153				goto free_new;
    154			}
    155			p = &(*p)->rb_left;
    156		} else if (new->be_f_offset >= ext_f_end(be)) {
    157			if (merge_ok && ext_can_merge(be, new)) {
    158				be->be_length += new->be_length;
    159				be = ext_try_to_merge_right(root, be);
    160				goto free_new;
    161			}
    162			p = &(*p)->rb_right;
    163		} else {
    164			BUG();
    165		}
    166	}
    167
    168	rb_link_node(&new->be_node, parent, p);
    169	rb_insert_color(&new->be_node, root);
    170	return;
    171free_new:
    172	nfs4_put_deviceid_node(new->be_device);
    173	kfree(new);
    174}
    175
    176static int
    177__ext_tree_remove(struct rb_root *root,
    178		sector_t start, sector_t end, struct list_head *tmp)
    179{
    180	struct pnfs_block_extent *be;
    181	sector_t len1 = 0, len2 = 0;
    182	sector_t orig_v_offset;
    183	sector_t orig_len;
    184
    185	be = __ext_tree_search(root, start);
    186	if (!be)
    187		return 0;
    188	if (be->be_f_offset >= end)
    189		return 0;
    190
    191	orig_v_offset = be->be_v_offset;
    192	orig_len = be->be_length;
    193
    194	if (start > be->be_f_offset)
    195		len1 = start - be->be_f_offset;
    196	if (ext_f_end(be) > end)
    197		len2 = ext_f_end(be) - end;
    198
    199	if (len2 > 0) {
    200		if (len1 > 0) {
    201			struct pnfs_block_extent *new;
    202
    203			new = kzalloc(sizeof(*new), GFP_ATOMIC);
    204			if (!new)
    205				return -ENOMEM;
    206
    207			be->be_length = len1;
    208
    209			new->be_f_offset = end;
    210			if (be->be_state != PNFS_BLOCK_NONE_DATA) {
    211				new->be_v_offset =
    212					orig_v_offset + orig_len - len2;
    213			}
    214			new->be_length = len2;
    215			new->be_state = be->be_state;
    216			new->be_tag = be->be_tag;
    217			new->be_device = nfs4_get_deviceid(be->be_device);
    218
    219			__ext_tree_insert(root, new, true);
    220		} else {
    221			be->be_f_offset = end;
    222			if (be->be_state != PNFS_BLOCK_NONE_DATA) {
    223				be->be_v_offset =
    224					orig_v_offset + orig_len - len2;
    225			}
    226			be->be_length = len2;
    227		}
    228	} else {
    229		if (len1 > 0) {
    230			be->be_length = len1;
    231			be = ext_tree_next(be);
    232		}
    233
    234		while (be && ext_f_end(be) <= end) {
    235			struct pnfs_block_extent *next = ext_tree_next(be);
    236
    237			rb_erase(&be->be_node, root);
    238			list_add_tail(&be->be_list, tmp);
    239			be = next;
    240		}
    241
    242		if (be && be->be_f_offset < end) {
    243			len1 = ext_f_end(be) - end;
    244			be->be_f_offset = end;
    245			if (be->be_state != PNFS_BLOCK_NONE_DATA)
    246				be->be_v_offset += be->be_length - len1;
    247			be->be_length = len1;
    248		}
    249	}
    250
    251	return 0;
    252}
    253
    254int
    255ext_tree_insert(struct pnfs_block_layout *bl, struct pnfs_block_extent *new)
    256{
    257	struct pnfs_block_extent *be;
    258	struct rb_root *root;
    259	int err = 0;
    260
    261	switch (new->be_state) {
    262	case PNFS_BLOCK_READWRITE_DATA:
    263	case PNFS_BLOCK_INVALID_DATA:
    264		root = &bl->bl_ext_rw;
    265		break;
    266	case PNFS_BLOCK_READ_DATA:
    267	case PNFS_BLOCK_NONE_DATA:
    268		root = &bl->bl_ext_ro;
    269		break;
    270	default:
    271		dprintk("invalid extent type\n");
    272		return -EINVAL;
    273	}
    274
    275	spin_lock(&bl->bl_ext_lock);
    276retry:
    277	be = __ext_tree_search(root, new->be_f_offset);
    278	if (!be || be->be_f_offset >= ext_f_end(new)) {
    279		__ext_tree_insert(root, new, true);
    280	} else if (new->be_f_offset >= be->be_f_offset) {
    281		if (ext_f_end(new) <= ext_f_end(be)) {
    282			nfs4_put_deviceid_node(new->be_device);
    283			kfree(new);
    284		} else {
    285			sector_t new_len = ext_f_end(new) - ext_f_end(be);
    286			sector_t diff = new->be_length - new_len;
    287
    288			new->be_f_offset += diff;
    289			new->be_v_offset += diff;
    290			new->be_length = new_len;
    291			goto retry;
    292		}
    293	} else if (ext_f_end(new) <= ext_f_end(be)) {
    294		new->be_length = be->be_f_offset - new->be_f_offset;
    295		__ext_tree_insert(root, new, true);
    296	} else {
    297		struct pnfs_block_extent *split;
    298		sector_t new_len = ext_f_end(new) - ext_f_end(be);
    299		sector_t diff = new->be_length - new_len;
    300
    301		split = kmemdup(new, sizeof(*new), GFP_ATOMIC);
    302		if (!split) {
    303			err = -EINVAL;
    304			goto out;
    305		}
    306
    307		split->be_length = be->be_f_offset - split->be_f_offset;
    308		split->be_device = nfs4_get_deviceid(new->be_device);
    309		__ext_tree_insert(root, split, true);
    310
    311		new->be_f_offset += diff;
    312		new->be_v_offset += diff;
    313		new->be_length = new_len;
    314		goto retry;
    315	}
    316out:
    317	spin_unlock(&bl->bl_ext_lock);
    318	return err;
    319}
    320
    321static bool
    322__ext_tree_lookup(struct rb_root *root, sector_t isect,
    323		struct pnfs_block_extent *ret)
    324{
    325	struct rb_node *node;
    326	struct pnfs_block_extent *be;
    327
    328	node = root->rb_node;
    329	while (node) {
    330		be = ext_node(node);
    331		if (isect < be->be_f_offset)
    332			node = node->rb_left;
    333		else if (isect >= ext_f_end(be))
    334			node = node->rb_right;
    335		else {
    336			*ret = *be;
    337			return true;
    338		}
    339	}
    340
    341	return false;
    342}
    343
    344bool
    345ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect,
    346	    struct pnfs_block_extent *ret, bool rw)
    347{
    348	bool found = false;
    349
    350	spin_lock(&bl->bl_ext_lock);
    351	if (!rw)
    352		found = __ext_tree_lookup(&bl->bl_ext_ro, isect, ret);
    353	if (!found)
    354		found = __ext_tree_lookup(&bl->bl_ext_rw, isect, ret);
    355	spin_unlock(&bl->bl_ext_lock);
    356
    357	return found;
    358}
    359
    360int ext_tree_remove(struct pnfs_block_layout *bl, bool rw,
    361		sector_t start, sector_t end)
    362{
    363	int err, err2;
    364	LIST_HEAD(tmp);
    365
    366	spin_lock(&bl->bl_ext_lock);
    367	err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
    368	if (rw) {
    369		err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);
    370		if (!err)
    371			err = err2;
    372	}
    373	spin_unlock(&bl->bl_ext_lock);
    374
    375	__ext_put_deviceids(&tmp);
    376	return err;
    377}
    378
    379static int
    380ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be,
    381		sector_t split)
    382{
    383	struct pnfs_block_extent *new;
    384	sector_t orig_len = be->be_length;
    385
    386	new = kzalloc(sizeof(*new), GFP_ATOMIC);
    387	if (!new)
    388		return -ENOMEM;
    389
    390	be->be_length = split - be->be_f_offset;
    391
    392	new->be_f_offset = split;
    393	if (be->be_state != PNFS_BLOCK_NONE_DATA)
    394		new->be_v_offset = be->be_v_offset + be->be_length;
    395	new->be_length = orig_len - be->be_length;
    396	new->be_state = be->be_state;
    397	new->be_tag = be->be_tag;
    398	new->be_device = nfs4_get_deviceid(be->be_device);
    399
    400	__ext_tree_insert(root, new, false);
    401	return 0;
    402}
    403
    404int
    405ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
    406		sector_t len, u64 lwb)
    407{
    408	struct rb_root *root = &bl->bl_ext_rw;
    409	sector_t end = start + len;
    410	struct pnfs_block_extent *be;
    411	int err = 0;
    412	LIST_HEAD(tmp);
    413
    414	spin_lock(&bl->bl_ext_lock);
    415	/*
    416	 * First remove all COW extents or holes from written to range.
    417	 */
    418	err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
    419	if (err)
    420		goto out;
    421
    422	/*
    423	 * Then mark all invalid extents in the range as written to.
    424	 */
    425	for (be = __ext_tree_search(root, start); be; be = ext_tree_next(be)) {
    426		if (be->be_f_offset >= end)
    427			break;
    428
    429		if (be->be_state != PNFS_BLOCK_INVALID_DATA || be->be_tag)
    430			continue;
    431
    432		if (be->be_f_offset < start) {
    433			struct pnfs_block_extent *left = ext_tree_prev(be);
    434
    435			if (left && ext_can_merge(left, be)) {
    436				sector_t diff = start - be->be_f_offset;
    437
    438				left->be_length += diff;
    439
    440				be->be_f_offset += diff;
    441				be->be_v_offset += diff;
    442				be->be_length -= diff;
    443			} else {
    444				err = ext_tree_split(root, be, start);
    445				if (err)
    446					goto out;
    447			}
    448		}
    449
    450		if (ext_f_end(be) > end) {
    451			struct pnfs_block_extent *right = ext_tree_next(be);
    452
    453			if (right && ext_can_merge(be, right)) {
    454				sector_t diff = end - be->be_f_offset;
    455
    456				be->be_length -= diff;
    457
    458				right->be_f_offset -= diff;
    459				right->be_v_offset -= diff;
    460				right->be_length += diff;
    461			} else {
    462				err = ext_tree_split(root, be, end);
    463				if (err)
    464					goto out;
    465			}
    466		}
    467
    468		if (be->be_f_offset >= start && ext_f_end(be) <= end) {
    469			be->be_tag = EXTENT_WRITTEN;
    470			be = ext_try_to_merge_left(root, be);
    471			be = ext_try_to_merge_right(root, be);
    472		}
    473	}
    474out:
    475	if (bl->bl_lwb < lwb)
    476		bl->bl_lwb = lwb;
    477	spin_unlock(&bl->bl_ext_lock);
    478
    479	__ext_put_deviceids(&tmp);
    480	return err;
    481}
    482
    483static size_t ext_tree_layoutupdate_size(struct pnfs_block_layout *bl, size_t count)
    484{
    485	if (bl->bl_scsi_layout)
    486		return sizeof(__be32) + PNFS_SCSI_RANGE_SIZE * count;
    487	else
    488		return sizeof(__be32) + PNFS_BLOCK_EXTENT_SIZE * count;
    489}
    490
    491static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
    492		size_t buffer_size)
    493{
    494	if (arg->layoutupdate_pages != &arg->layoutupdate_page) {
    495		int nr_pages = DIV_ROUND_UP(buffer_size, PAGE_SIZE), i;
    496
    497		for (i = 0; i < nr_pages; i++)
    498			put_page(arg->layoutupdate_pages[i]);
    499		vfree(arg->start_p);
    500		kfree(arg->layoutupdate_pages);
    501	} else {
    502		put_page(arg->layoutupdate_page);
    503	}
    504}
    505
    506static __be32 *encode_block_extent(struct pnfs_block_extent *be, __be32 *p)
    507{
    508	p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
    509			NFS4_DEVICEID4_SIZE);
    510	p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
    511	p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
    512	p = xdr_encode_hyper(p, 0LL);
    513	*p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
    514	return p;
    515}
    516
    517static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
    518{
    519	p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
    520	return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
    521}
    522
    523static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
    524		size_t buffer_size, size_t *count, __u64 *lastbyte)
    525{
    526	struct pnfs_block_extent *be;
    527	int ret = 0;
    528
    529	spin_lock(&bl->bl_ext_lock);
    530	for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) {
    531		if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
    532		    be->be_tag != EXTENT_WRITTEN)
    533			continue;
    534
    535		(*count)++;
    536		if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
    537			/* keep counting.. */
    538			ret = -ENOSPC;
    539			continue;
    540		}
    541
    542		if (bl->bl_scsi_layout)
    543			p = encode_scsi_range(be, p);
    544		else
    545			p = encode_block_extent(be, p);
    546		be->be_tag = EXTENT_COMMITTING;
    547	}
    548	*lastbyte = bl->bl_lwb - 1;
    549	bl->bl_lwb = 0;
    550	spin_unlock(&bl->bl_ext_lock);
    551
    552	return ret;
    553}
    554
    555int
    556ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
    557{
    558	struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout);
    559	size_t count = 0, buffer_size = PAGE_SIZE;
    560	__be32 *start_p;
    561	int ret;
    562
    563	dprintk("%s enter\n", __func__);
    564
    565	arg->layoutupdate_page = alloc_page(GFP_NOFS);
    566	if (!arg->layoutupdate_page)
    567		return -ENOMEM;
    568	start_p = page_address(arg->layoutupdate_page);
    569	arg->layoutupdate_pages = &arg->layoutupdate_page;
    570
    571retry:
    572	ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten);
    573	if (unlikely(ret)) {
    574		ext_tree_free_commitdata(arg, buffer_size);
    575
    576		buffer_size = ext_tree_layoutupdate_size(bl, count);
    577		count = 0;
    578
    579		arg->layoutupdate_pages =
    580			kcalloc(DIV_ROUND_UP(buffer_size, PAGE_SIZE),
    581				sizeof(struct page *), GFP_NOFS);
    582		if (!arg->layoutupdate_pages)
    583			return -ENOMEM;
    584
    585		start_p = __vmalloc(buffer_size, GFP_NOFS);
    586		if (!start_p) {
    587			kfree(arg->layoutupdate_pages);
    588			return -ENOMEM;
    589		}
    590
    591		goto retry;
    592	}
    593
    594	*start_p = cpu_to_be32(count);
    595	arg->layoutupdate_len = ext_tree_layoutupdate_size(bl, count);
    596
    597	if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
    598		void *p = start_p, *end = p + arg->layoutupdate_len;
    599		struct page *page = NULL;
    600		int i = 0;
    601
    602		arg->start_p = start_p;
    603		for ( ; p < end; p += PAGE_SIZE) {
    604			page = vmalloc_to_page(p);
    605			arg->layoutupdate_pages[i++] = page;
    606			get_page(page);
    607		}
    608	}
    609
    610	dprintk("%s found %zu ranges\n", __func__, count);
    611	return 0;
    612}
    613
    614void
    615ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status)
    616{
    617	struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout);
    618	struct rb_root *root = &bl->bl_ext_rw;
    619	struct pnfs_block_extent *be;
    620
    621	dprintk("%s status %d\n", __func__, status);
    622
    623	ext_tree_free_commitdata(arg, arg->layoutupdate_len);
    624
    625	spin_lock(&bl->bl_ext_lock);
    626	for (be = ext_tree_first(root); be; be = ext_tree_next(be)) {
    627		if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
    628		    be->be_tag != EXTENT_COMMITTING)
    629			continue;
    630
    631		if (status) {
    632			/*
    633			 * Mark as written and try again.
    634			 *
    635			 * XXX: some real error handling here wouldn't hurt..
    636			 */
    637			be->be_tag = EXTENT_WRITTEN;
    638		} else {
    639			be->be_state = PNFS_BLOCK_READWRITE_DATA;
    640			be->be_tag = 0;
    641		}
    642
    643		be = ext_try_to_merge_left(root, be);
    644		be = ext_try_to_merge_right(root, be);
    645	}
    646	spin_unlock(&bl->bl_ext_lock);
    647}