cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

mdt.c (16291B)


      1// SPDX-License-Identifier: GPL-2.0+
      2/*
      3 * Meta data file for NILFS
      4 *
      5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
      6 *
      7 * Written by Ryusuke Konishi.
      8 */
      9
     10#include <linux/buffer_head.h>
     11#include <linux/mpage.h>
     12#include <linux/mm.h>
     13#include <linux/writeback.h>
     14#include <linux/backing-dev.h>
     15#include <linux/swap.h>
     16#include <linux/slab.h>
     17#include "nilfs.h"
     18#include "btnode.h"
     19#include "segment.h"
     20#include "page.h"
     21#include "mdt.h"
     22#include "alloc.h"		/* nilfs_palloc_destroy_cache() */
     23
     24#include <trace/events/nilfs2.h>
     25
     26#define NILFS_MDT_MAX_RA_BLOCKS		(16 - 1)
     27
     28
     29static int
     30nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
     31			   struct buffer_head *bh,
     32			   void (*init_block)(struct inode *,
     33					      struct buffer_head *, void *))
     34{
     35	struct nilfs_inode_info *ii = NILFS_I(inode);
     36	void *kaddr;
     37	int ret;
     38
     39	/* Caller exclude read accesses using page lock */
     40
     41	/* set_buffer_new(bh); */
     42	bh->b_blocknr = 0;
     43
     44	ret = nilfs_bmap_insert(ii->i_bmap, block, (unsigned long)bh);
     45	if (unlikely(ret))
     46		return ret;
     47
     48	set_buffer_mapped(bh);
     49
     50	kaddr = kmap_atomic(bh->b_page);
     51	memset(kaddr + bh_offset(bh), 0, i_blocksize(inode));
     52	if (init_block)
     53		init_block(inode, bh, kaddr);
     54	flush_dcache_page(bh->b_page);
     55	kunmap_atomic(kaddr);
     56
     57	set_buffer_uptodate(bh);
     58	mark_buffer_dirty(bh);
     59	nilfs_mdt_mark_dirty(inode);
     60
     61	trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block);
     62
     63	return 0;
     64}
     65
     66static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
     67				  struct buffer_head **out_bh,
     68				  void (*init_block)(struct inode *,
     69						     struct buffer_head *,
     70						     void *))
     71{
     72	struct super_block *sb = inode->i_sb;
     73	struct nilfs_transaction_info ti;
     74	struct buffer_head *bh;
     75	int err;
     76
     77	nilfs_transaction_begin(sb, &ti, 0);
     78
     79	err = -ENOMEM;
     80	bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0);
     81	if (unlikely(!bh))
     82		goto failed_unlock;
     83
     84	err = -EEXIST;
     85	if (buffer_uptodate(bh))
     86		goto failed_bh;
     87
     88	wait_on_buffer(bh);
     89	if (buffer_uptodate(bh))
     90		goto failed_bh;
     91
     92	bh->b_bdev = sb->s_bdev;
     93	err = nilfs_mdt_insert_new_block(inode, block, bh, init_block);
     94	if (likely(!err)) {
     95		get_bh(bh);
     96		*out_bh = bh;
     97	}
     98
     99 failed_bh:
    100	unlock_page(bh->b_page);
    101	put_page(bh->b_page);
    102	brelse(bh);
    103
    104 failed_unlock:
    105	if (likely(!err))
    106		err = nilfs_transaction_commit(sb);
    107	else
    108		nilfs_transaction_abort(sb);
    109
    110	return err;
    111}
    112
    113static int
    114nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
    115		       int mode, int mode_flags, struct buffer_head **out_bh)
    116{
    117	struct buffer_head *bh;
    118	__u64 blknum = 0;
    119	int ret = -ENOMEM;
    120
    121	bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0);
    122	if (unlikely(!bh))
    123		goto failed;
    124
    125	ret = -EEXIST; /* internal code */
    126	if (buffer_uptodate(bh))
    127		goto out;
    128
    129	if (mode_flags & REQ_RAHEAD) {
    130		if (!trylock_buffer(bh)) {
    131			ret = -EBUSY;
    132			goto failed_bh;
    133		}
    134	} else /* mode == READ */
    135		lock_buffer(bh);
    136
    137	if (buffer_uptodate(bh)) {
    138		unlock_buffer(bh);
    139		goto out;
    140	}
    141
    142	ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum);
    143	if (unlikely(ret)) {
    144		unlock_buffer(bh);
    145		goto failed_bh;
    146	}
    147	map_bh(bh, inode->i_sb, (sector_t)blknum);
    148
    149	bh->b_end_io = end_buffer_read_sync;
    150	get_bh(bh);
    151	submit_bh(mode, mode_flags, bh);
    152	ret = 0;
    153
    154	trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode);
    155 out:
    156	get_bh(bh);
    157	*out_bh = bh;
    158
    159 failed_bh:
    160	unlock_page(bh->b_page);
    161	put_page(bh->b_page);
    162	brelse(bh);
    163 failed:
    164	return ret;
    165}
    166
    167static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
    168				int readahead, struct buffer_head **out_bh)
    169{
    170	struct buffer_head *first_bh, *bh;
    171	unsigned long blkoff;
    172	int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS;
    173	int err;
    174
    175	err = nilfs_mdt_submit_block(inode, block, REQ_OP_READ, 0, &first_bh);
    176	if (err == -EEXIST) /* internal code */
    177		goto out;
    178
    179	if (unlikely(err))
    180		goto failed;
    181
    182	if (readahead) {
    183		blkoff = block + 1;
    184		for (i = 0; i < nr_ra_blocks; i++, blkoff++) {
    185			err = nilfs_mdt_submit_block(inode, blkoff, REQ_OP_READ,
    186						     REQ_RAHEAD, &bh);
    187			if (likely(!err || err == -EEXIST))
    188				brelse(bh);
    189			else if (err != -EBUSY)
    190				break;
    191				/* abort readahead if bmap lookup failed */
    192			if (!buffer_locked(first_bh))
    193				goto out_no_wait;
    194		}
    195	}
    196
    197	wait_on_buffer(first_bh);
    198
    199 out_no_wait:
    200	err = -EIO;
    201	if (!buffer_uptodate(first_bh)) {
    202		nilfs_err(inode->i_sb,
    203			  "I/O error reading meta-data file (ino=%lu, block-offset=%lu)",
    204			  inode->i_ino, block);
    205		goto failed_bh;
    206	}
    207 out:
    208	*out_bh = first_bh;
    209	return 0;
    210
    211 failed_bh:
    212	brelse(first_bh);
    213 failed:
    214	return err;
    215}
    216
    217/**
    218 * nilfs_mdt_get_block - read or create a buffer on meta data file.
    219 * @inode: inode of the meta data file
    220 * @blkoff: block offset
    221 * @create: create flag
    222 * @init_block: initializer used for newly allocated block
    223 * @out_bh: output of a pointer to the buffer_head
    224 *
    225 * nilfs_mdt_get_block() looks up the specified buffer and tries to create
    226 * a new buffer if @create is not zero.  On success, the returned buffer is
    227 * assured to be either existing or formatted using a buffer lock on success.
    228 * @out_bh is substituted only when zero is returned.
    229 *
    230 * Return Value: On success, it returns 0. On error, the following negative
    231 * error code is returned.
    232 *
    233 * %-ENOMEM - Insufficient memory available.
    234 *
    235 * %-EIO - I/O error
    236 *
    237 * %-ENOENT - the specified block does not exist (hole block)
    238 *
    239 * %-EROFS - Read only filesystem (for create mode)
    240 */
    241int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
    242			void (*init_block)(struct inode *,
    243					   struct buffer_head *, void *),
    244			struct buffer_head **out_bh)
    245{
    246	int ret;
    247
    248	/* Should be rewritten with merging nilfs_mdt_read_block() */
    249 retry:
    250	ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh);
    251	if (!create || ret != -ENOENT)
    252		return ret;
    253
    254	ret = nilfs_mdt_create_block(inode, blkoff, out_bh, init_block);
    255	if (unlikely(ret == -EEXIST)) {
    256		/* create = 0; */  /* limit read-create loop retries */
    257		goto retry;
    258	}
    259	return ret;
    260}
    261
    262/**
    263 * nilfs_mdt_find_block - find and get a buffer on meta data file.
    264 * @inode: inode of the meta data file
    265 * @start: start block offset (inclusive)
    266 * @end: end block offset (inclusive)
    267 * @blkoff: block offset
    268 * @out_bh: place to store a pointer to buffer_head struct
    269 *
    270 * nilfs_mdt_find_block() looks up an existing block in range of
    271 * [@start, @end] and stores pointer to a buffer head of the block to
    272 * @out_bh, and block offset to @blkoff, respectively.  @out_bh and
    273 * @blkoff are substituted only when zero is returned.
    274 *
    275 * Return Value: On success, it returns 0. On error, the following negative
    276 * error code is returned.
    277 *
    278 * %-ENOMEM - Insufficient memory available.
    279 *
    280 * %-EIO - I/O error
    281 *
    282 * %-ENOENT - no block was found in the range
    283 */
    284int nilfs_mdt_find_block(struct inode *inode, unsigned long start,
    285			 unsigned long end, unsigned long *blkoff,
    286			 struct buffer_head **out_bh)
    287{
    288	__u64 next;
    289	int ret;
    290
    291	if (unlikely(start > end))
    292		return -ENOENT;
    293
    294	ret = nilfs_mdt_read_block(inode, start, true, out_bh);
    295	if (!ret) {
    296		*blkoff = start;
    297		goto out;
    298	}
    299	if (unlikely(ret != -ENOENT || start == ULONG_MAX))
    300		goto out;
    301
    302	ret = nilfs_bmap_seek_key(NILFS_I(inode)->i_bmap, start + 1, &next);
    303	if (!ret) {
    304		if (next <= end) {
    305			ret = nilfs_mdt_read_block(inode, next, true, out_bh);
    306			if (!ret)
    307				*blkoff = next;
    308		} else {
    309			ret = -ENOENT;
    310		}
    311	}
    312out:
    313	return ret;
    314}
    315
    316/**
    317 * nilfs_mdt_delete_block - make a hole on the meta data file.
    318 * @inode: inode of the meta data file
    319 * @block: block offset
    320 *
    321 * Return Value: On success, zero is returned.
    322 * On error, one of the following negative error code is returned.
    323 *
    324 * %-ENOMEM - Insufficient memory available.
    325 *
    326 * %-EIO - I/O error
    327 */
    328int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
    329{
    330	struct nilfs_inode_info *ii = NILFS_I(inode);
    331	int err;
    332
    333	err = nilfs_bmap_delete(ii->i_bmap, block);
    334	if (!err || err == -ENOENT) {
    335		nilfs_mdt_mark_dirty(inode);
    336		nilfs_mdt_forget_block(inode, block);
    337	}
    338	return err;
    339}
    340
    341/**
    342 * nilfs_mdt_forget_block - discard dirty state and try to remove the page
    343 * @inode: inode of the meta data file
    344 * @block: block offset
    345 *
    346 * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and
    347 * tries to release the page including the buffer from a page cache.
    348 *
    349 * Return Value: On success, 0 is returned. On error, one of the following
    350 * negative error code is returned.
    351 *
    352 * %-EBUSY - page has an active buffer.
    353 *
    354 * %-ENOENT - page cache has no page addressed by the offset.
    355 */
    356int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
    357{
    358	pgoff_t index = (pgoff_t)block >>
    359		(PAGE_SHIFT - inode->i_blkbits);
    360	struct page *page;
    361	unsigned long first_block;
    362	int ret = 0;
    363	int still_dirty;
    364
    365	page = find_lock_page(inode->i_mapping, index);
    366	if (!page)
    367		return -ENOENT;
    368
    369	wait_on_page_writeback(page);
    370
    371	first_block = (unsigned long)index <<
    372		(PAGE_SHIFT - inode->i_blkbits);
    373	if (page_has_buffers(page)) {
    374		struct buffer_head *bh;
    375
    376		bh = nilfs_page_get_nth_block(page, block - first_block);
    377		nilfs_forget_buffer(bh);
    378	}
    379	still_dirty = PageDirty(page);
    380	unlock_page(page);
    381	put_page(page);
    382
    383	if (still_dirty ||
    384	    invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0)
    385		ret = -EBUSY;
    386	return ret;
    387}
    388
    389int nilfs_mdt_fetch_dirty(struct inode *inode)
    390{
    391	struct nilfs_inode_info *ii = NILFS_I(inode);
    392
    393	if (nilfs_bmap_test_and_clear_dirty(ii->i_bmap)) {
    394		set_bit(NILFS_I_DIRTY, &ii->i_state);
    395		return 1;
    396	}
    397	return test_bit(NILFS_I_DIRTY, &ii->i_state);
    398}
    399
    400static int
    401nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
    402{
    403	struct inode *inode = page->mapping->host;
    404	struct super_block *sb;
    405	int err = 0;
    406
    407	if (inode && sb_rdonly(inode->i_sb)) {
    408		/*
    409		 * It means that filesystem was remounted in read-only
    410		 * mode because of error or metadata corruption. But we
    411		 * have dirty pages that try to be flushed in background.
    412		 * So, here we simply discard this dirty page.
    413		 */
    414		nilfs_clear_dirty_page(page, false);
    415		unlock_page(page);
    416		return -EROFS;
    417	}
    418
    419	redirty_page_for_writepage(wbc, page);
    420	unlock_page(page);
    421
    422	if (!inode)
    423		return 0;
    424
    425	sb = inode->i_sb;
    426
    427	if (wbc->sync_mode == WB_SYNC_ALL)
    428		err = nilfs_construct_segment(sb);
    429	else if (wbc->for_reclaim)
    430		nilfs_flush_segment(sb, inode->i_ino);
    431
    432	return err;
    433}
    434
    435
    436static const struct address_space_operations def_mdt_aops = {
    437	.dirty_folio		= block_dirty_folio,
    438	.invalidate_folio	= block_invalidate_folio,
    439	.writepage		= nilfs_mdt_write_page,
    440};
    441
    442static const struct inode_operations def_mdt_iops;
    443static const struct file_operations def_mdt_fops;
    444
    445
    446int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz)
    447{
    448	struct nilfs_mdt_info *mi;
    449
    450	mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS);
    451	if (!mi)
    452		return -ENOMEM;
    453
    454	init_rwsem(&mi->mi_sem);
    455	inode->i_private = mi;
    456
    457	inode->i_mode = S_IFREG;
    458	mapping_set_gfp_mask(inode->i_mapping, gfp_mask);
    459
    460	inode->i_op = &def_mdt_iops;
    461	inode->i_fop = &def_mdt_fops;
    462	inode->i_mapping->a_ops = &def_mdt_aops;
    463
    464	return 0;
    465}
    466
    467/**
    468 * nilfs_mdt_clear - do cleanup for the metadata file
    469 * @inode: inode of the metadata file
    470 */
    471void nilfs_mdt_clear(struct inode *inode)
    472{
    473	struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
    474	struct nilfs_shadow_map *shadow = mdi->mi_shadow;
    475
    476	if (mdi->mi_palloc_cache)
    477		nilfs_palloc_destroy_cache(inode);
    478
    479	if (shadow) {
    480		struct inode *s_inode = shadow->inode;
    481
    482		shadow->inode = NULL;
    483		iput(s_inode);
    484		mdi->mi_shadow = NULL;
    485	}
    486}
    487
    488/**
    489 * nilfs_mdt_destroy - release resources used by the metadata file
    490 * @inode: inode of the metadata file
    491 */
    492void nilfs_mdt_destroy(struct inode *inode)
    493{
    494	struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
    495
    496	kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
    497	kfree(mdi);
    498}
    499
    500void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size,
    501			      unsigned int header_size)
    502{
    503	struct nilfs_mdt_info *mi = NILFS_MDT(inode);
    504
    505	mi->mi_entry_size = entry_size;
    506	mi->mi_entries_per_block = i_blocksize(inode) / entry_size;
    507	mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
    508}
    509
    510/**
    511 * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file
    512 * @inode: inode of the metadata file
    513 * @shadow: shadow mapping
    514 */
    515int nilfs_mdt_setup_shadow_map(struct inode *inode,
    516			       struct nilfs_shadow_map *shadow)
    517{
    518	struct nilfs_mdt_info *mi = NILFS_MDT(inode);
    519	struct inode *s_inode;
    520
    521	INIT_LIST_HEAD(&shadow->frozen_buffers);
    522
    523	s_inode = nilfs_iget_for_shadow(inode);
    524	if (IS_ERR(s_inode))
    525		return PTR_ERR(s_inode);
    526
    527	shadow->inode = s_inode;
    528	mi->mi_shadow = shadow;
    529	return 0;
    530}
    531
    532/**
    533 * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map
    534 * @inode: inode of the metadata file
    535 */
    536int nilfs_mdt_save_to_shadow_map(struct inode *inode)
    537{
    538	struct nilfs_mdt_info *mi = NILFS_MDT(inode);
    539	struct nilfs_inode_info *ii = NILFS_I(inode);
    540	struct nilfs_shadow_map *shadow = mi->mi_shadow;
    541	struct inode *s_inode = shadow->inode;
    542	int ret;
    543
    544	ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping);
    545	if (ret)
    546		goto out;
    547
    548	ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping,
    549				     ii->i_assoc_inode->i_mapping);
    550	if (ret)
    551		goto out;
    552
    553	nilfs_bmap_save(ii->i_bmap, &shadow->bmap_store);
    554 out:
    555	return ret;
    556}
    557
    558int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
    559{
    560	struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
    561	struct buffer_head *bh_frozen;
    562	struct page *page;
    563	int blkbits = inode->i_blkbits;
    564
    565	page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index);
    566	if (!page)
    567		return -ENOMEM;
    568
    569	if (!page_has_buffers(page))
    570		create_empty_buffers(page, 1 << blkbits, 0);
    571
    572	bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits);
    573
    574	if (!buffer_uptodate(bh_frozen))
    575		nilfs_copy_buffer(bh_frozen, bh);
    576	if (list_empty(&bh_frozen->b_assoc_buffers)) {
    577		list_add_tail(&bh_frozen->b_assoc_buffers,
    578			      &shadow->frozen_buffers);
    579		set_buffer_nilfs_redirected(bh);
    580	} else {
    581		brelse(bh_frozen); /* already frozen */
    582	}
    583
    584	unlock_page(page);
    585	put_page(page);
    586	return 0;
    587}
    588
    589struct buffer_head *
    590nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh)
    591{
    592	struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
    593	struct buffer_head *bh_frozen = NULL;
    594	struct page *page;
    595	int n;
    596
    597	page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index);
    598	if (page) {
    599		if (page_has_buffers(page)) {
    600			n = bh_offset(bh) >> inode->i_blkbits;
    601			bh_frozen = nilfs_page_get_nth_block(page, n);
    602		}
    603		unlock_page(page);
    604		put_page(page);
    605	}
    606	return bh_frozen;
    607}
    608
    609static void nilfs_release_frozen_buffers(struct nilfs_shadow_map *shadow)
    610{
    611	struct list_head *head = &shadow->frozen_buffers;
    612	struct buffer_head *bh;
    613
    614	while (!list_empty(head)) {
    615		bh = list_first_entry(head, struct buffer_head,
    616				      b_assoc_buffers);
    617		list_del_init(&bh->b_assoc_buffers);
    618		brelse(bh); /* drop ref-count to make it releasable */
    619	}
    620}
    621
    622/**
    623 * nilfs_mdt_restore_from_shadow_map - restore dirty pages and bmap state
    624 * @inode: inode of the metadata file
    625 */
    626void nilfs_mdt_restore_from_shadow_map(struct inode *inode)
    627{
    628	struct nilfs_mdt_info *mi = NILFS_MDT(inode);
    629	struct nilfs_inode_info *ii = NILFS_I(inode);
    630	struct nilfs_shadow_map *shadow = mi->mi_shadow;
    631
    632	down_write(&mi->mi_sem);
    633
    634	if (mi->mi_palloc_cache)
    635		nilfs_palloc_clear_cache(inode);
    636
    637	nilfs_clear_dirty_pages(inode->i_mapping, true);
    638	nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping);
    639
    640	nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true);
    641	nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping,
    642			      NILFS_I(shadow->inode)->i_assoc_inode->i_mapping);
    643
    644	nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store);
    645
    646	up_write(&mi->mi_sem);
    647}
    648
    649/**
    650 * nilfs_mdt_clear_shadow_map - truncate pages in shadow map caches
    651 * @inode: inode of the metadata file
    652 */
    653void nilfs_mdt_clear_shadow_map(struct inode *inode)
    654{
    655	struct nilfs_mdt_info *mi = NILFS_MDT(inode);
    656	struct nilfs_shadow_map *shadow = mi->mi_shadow;
    657	struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode;
    658
    659	down_write(&mi->mi_sem);
    660	nilfs_release_frozen_buffers(shadow);
    661	truncate_inode_pages(shadow->inode->i_mapping, 0);
    662	truncate_inode_pages(shadow_btnc_inode->i_mapping, 0);
    663	up_write(&mi->mi_sem);
    664}