cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

file.c (48005B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * This file is part of UBIFS.
      4 *
      5 * Copyright (C) 2006-2008 Nokia Corporation.
      6 *
      7 * Authors: Artem Bityutskiy (Битюцкий Артём)
      8 *          Adrian Hunter
      9 */
     10
     11/*
     12 * This file implements VFS file and inode operations for regular files, device
     13 * nodes and symlinks as well as address space operations.
     14 *
     15 * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if
     16 * the page is dirty and is used for optimization purposes - dirty pages are
     17 * not budgeted so the flag shows that 'ubifs_write_end()' should not release
     18 * the budget for this page. The @PG_checked flag is set if full budgeting is
     19 * required for the page e.g., when it corresponds to a file hole or it is
     20 * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because
     21 * it is OK to fail in this function, and the budget is released in
     22 * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry
     23 * information about how the page was budgeted, to make it possible to release
     24 * the budget properly.
     25 *
     26 * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we
     27 * implement. However, this is not true for 'ubifs_writepage()', which may be
     28 * called with @i_mutex unlocked. For example, when flusher thread is doing
     29 * background write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex.
     30 * At "normal" work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g.
     31 * in the "sys_write -> alloc_pages -> direct reclaim path". So, in
     32 * 'ubifs_writepage()' we are only guaranteed that the page is locked.
     33 *
     34 * Similarly, @i_mutex is not always locked in 'ubifs_read_folio()', e.g., the
     35 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
     36 * ondemand_readahead -> read_folio"). In case of readahead, @I_SYNC flag is not
     37 * set as well. However, UBIFS disables readahead.
     38 */
     39
     40#include "ubifs.h"
     41#include <linux/mount.h>
     42#include <linux/slab.h>
     43#include <linux/migrate.h>
     44
     45static int read_block(struct inode *inode, void *addr, unsigned int block,
     46		      struct ubifs_data_node *dn)
     47{
     48	struct ubifs_info *c = inode->i_sb->s_fs_info;
     49	int err, len, out_len;
     50	union ubifs_key key;
     51	unsigned int dlen;
     52
     53	data_key_init(c, &key, inode->i_ino, block);
     54	err = ubifs_tnc_lookup(c, &key, dn);
     55	if (err) {
     56		if (err == -ENOENT)
     57			/* Not found, so it must be a hole */
     58			memset(addr, 0, UBIFS_BLOCK_SIZE);
     59		return err;
     60	}
     61
     62	ubifs_assert(c, le64_to_cpu(dn->ch.sqnum) >
     63		     ubifs_inode(inode)->creat_sqnum);
     64	len = le32_to_cpu(dn->size);
     65	if (len <= 0 || len > UBIFS_BLOCK_SIZE)
     66		goto dump;
     67
     68	dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
     69
     70	if (IS_ENCRYPTED(inode)) {
     71		err = ubifs_decrypt(inode, dn, &dlen, block);
     72		if (err)
     73			goto dump;
     74	}
     75
     76	out_len = UBIFS_BLOCK_SIZE;
     77	err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len,
     78			       le16_to_cpu(dn->compr_type));
     79	if (err || len != out_len)
     80		goto dump;
     81
     82	/*
     83	 * Data length can be less than a full block, even for blocks that are
     84	 * not the last in the file (e.g., as a result of making a hole and
     85	 * appending data). Ensure that the remainder is zeroed out.
     86	 */
     87	if (len < UBIFS_BLOCK_SIZE)
     88		memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
     89
     90	return 0;
     91
     92dump:
     93	ubifs_err(c, "bad data node (block %u, inode %lu)",
     94		  block, inode->i_ino);
     95	ubifs_dump_node(c, dn, UBIFS_MAX_DATA_NODE_SZ);
     96	return -EINVAL;
     97}
     98
     99static int do_readpage(struct page *page)
    100{
    101	void *addr;
    102	int err = 0, i;
    103	unsigned int block, beyond;
    104	struct ubifs_data_node *dn;
    105	struct inode *inode = page->mapping->host;
    106	struct ubifs_info *c = inode->i_sb->s_fs_info;
    107	loff_t i_size = i_size_read(inode);
    108
    109	dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
    110		inode->i_ino, page->index, i_size, page->flags);
    111	ubifs_assert(c, !PageChecked(page));
    112	ubifs_assert(c, !PagePrivate(page));
    113
    114	addr = kmap(page);
    115
    116	block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
    117	beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
    118	if (block >= beyond) {
    119		/* Reading beyond inode */
    120		SetPageChecked(page);
    121		memset(addr, 0, PAGE_SIZE);
    122		goto out;
    123	}
    124
    125	dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS);
    126	if (!dn) {
    127		err = -ENOMEM;
    128		goto error;
    129	}
    130
    131	i = 0;
    132	while (1) {
    133		int ret;
    134
    135		if (block >= beyond) {
    136			/* Reading beyond inode */
    137			err = -ENOENT;
    138			memset(addr, 0, UBIFS_BLOCK_SIZE);
    139		} else {
    140			ret = read_block(inode, addr, block, dn);
    141			if (ret) {
    142				err = ret;
    143				if (err != -ENOENT)
    144					break;
    145			} else if (block + 1 == beyond) {
    146				int dlen = le32_to_cpu(dn->size);
    147				int ilen = i_size & (UBIFS_BLOCK_SIZE - 1);
    148
    149				if (ilen && ilen < dlen)
    150					memset(addr + ilen, 0, dlen - ilen);
    151			}
    152		}
    153		if (++i >= UBIFS_BLOCKS_PER_PAGE)
    154			break;
    155		block += 1;
    156		addr += UBIFS_BLOCK_SIZE;
    157	}
    158	if (err) {
    159		struct ubifs_info *c = inode->i_sb->s_fs_info;
    160		if (err == -ENOENT) {
    161			/* Not found, so it must be a hole */
    162			SetPageChecked(page);
    163			dbg_gen("hole");
    164			goto out_free;
    165		}
    166		ubifs_err(c, "cannot read page %lu of inode %lu, error %d",
    167			  page->index, inode->i_ino, err);
    168		goto error;
    169	}
    170
    171out_free:
    172	kfree(dn);
    173out:
    174	SetPageUptodate(page);
    175	ClearPageError(page);
    176	flush_dcache_page(page);
    177	kunmap(page);
    178	return 0;
    179
    180error:
    181	kfree(dn);
    182	ClearPageUptodate(page);
    183	SetPageError(page);
    184	flush_dcache_page(page);
    185	kunmap(page);
    186	return err;
    187}
    188
    189/**
    190 * release_new_page_budget - release budget of a new page.
    191 * @c: UBIFS file-system description object
    192 *
    193 * This is a helper function which releases budget corresponding to the budget
    194 * of one new page of data.
    195 */
    196static void release_new_page_budget(struct ubifs_info *c)
    197{
    198	struct ubifs_budget_req req = { .recalculate = 1, .new_page = 1 };
    199
    200	ubifs_release_budget(c, &req);
    201}
    202
    203/**
    204 * release_existing_page_budget - release budget of an existing page.
    205 * @c: UBIFS file-system description object
    206 *
    207 * This is a helper function which releases budget corresponding to the budget
    208 * of changing one page of data which already exists on the flash media.
    209 */
    210static void release_existing_page_budget(struct ubifs_info *c)
    211{
    212	struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget};
    213
    214	ubifs_release_budget(c, &req);
    215}
    216
    217static int write_begin_slow(struct address_space *mapping,
    218			    loff_t pos, unsigned len, struct page **pagep)
    219{
    220	struct inode *inode = mapping->host;
    221	struct ubifs_info *c = inode->i_sb->s_fs_info;
    222	pgoff_t index = pos >> PAGE_SHIFT;
    223	struct ubifs_budget_req req = { .new_page = 1 };
    224	int err, appending = !!(pos + len > inode->i_size);
    225	struct page *page;
    226
    227	dbg_gen("ino %lu, pos %llu, len %u, i_size %lld",
    228		inode->i_ino, pos, len, inode->i_size);
    229
    230	/*
    231	 * At the slow path we have to budget before locking the page, because
    232	 * budgeting may force write-back, which would wait on locked pages and
    233	 * deadlock if we had the page locked. At this point we do not know
    234	 * anything about the page, so assume that this is a new page which is
    235	 * written to a hole. This corresponds to largest budget. Later the
    236	 * budget will be amended if this is not true.
    237	 */
    238	if (appending)
    239		/* We are appending data, budget for inode change */
    240		req.dirtied_ino = 1;
    241
    242	err = ubifs_budget_space(c, &req);
    243	if (unlikely(err))
    244		return err;
    245
    246	page = grab_cache_page_write_begin(mapping, index);
    247	if (unlikely(!page)) {
    248		ubifs_release_budget(c, &req);
    249		return -ENOMEM;
    250	}
    251
    252	if (!PageUptodate(page)) {
    253		if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE)
    254			SetPageChecked(page);
    255		else {
    256			err = do_readpage(page);
    257			if (err) {
    258				unlock_page(page);
    259				put_page(page);
    260				ubifs_release_budget(c, &req);
    261				return err;
    262			}
    263		}
    264
    265		SetPageUptodate(page);
    266		ClearPageError(page);
    267	}
    268
    269	if (PagePrivate(page))
    270		/*
    271		 * The page is dirty, which means it was budgeted twice:
    272		 *   o first time the budget was allocated by the task which
    273		 *     made the page dirty and set the PG_private flag;
    274		 *   o and then we budgeted for it for the second time at the
    275		 *     very beginning of this function.
    276		 *
    277		 * So what we have to do is to release the page budget we
    278		 * allocated.
    279		 */
    280		release_new_page_budget(c);
    281	else if (!PageChecked(page))
    282		/*
    283		 * We are changing a page which already exists on the media.
    284		 * This means that changing the page does not make the amount
    285		 * of indexing information larger, and this part of the budget
    286		 * which we have already acquired may be released.
    287		 */
    288		ubifs_convert_page_budget(c);
    289
    290	if (appending) {
    291		struct ubifs_inode *ui = ubifs_inode(inode);
    292
    293		/*
    294		 * 'ubifs_write_end()' is optimized from the fast-path part of
    295		 * 'ubifs_write_begin()' and expects the @ui_mutex to be locked
    296		 * if data is appended.
    297		 */
    298		mutex_lock(&ui->ui_mutex);
    299		if (ui->dirty)
    300			/*
    301			 * The inode is dirty already, so we may free the
    302			 * budget we allocated.
    303			 */
    304			ubifs_release_dirty_inode_budget(c, ui);
    305	}
    306
    307	*pagep = page;
    308	return 0;
    309}
    310
    311/**
    312 * allocate_budget - allocate budget for 'ubifs_write_begin()'.
    313 * @c: UBIFS file-system description object
    314 * @page: page to allocate budget for
    315 * @ui: UBIFS inode object the page belongs to
    316 * @appending: non-zero if the page is appended
    317 *
    318 * This is a helper function for 'ubifs_write_begin()' which allocates budget
    319 * for the operation. The budget is allocated differently depending on whether
    320 * this is appending, whether the page is dirty or not, and so on. This
    321 * function leaves the @ui->ui_mutex locked in case of appending. Returns zero
    322 * in case of success and %-ENOSPC in case of failure.
    323 */
    324static int allocate_budget(struct ubifs_info *c, struct page *page,
    325			   struct ubifs_inode *ui, int appending)
    326{
    327	struct ubifs_budget_req req = { .fast = 1 };
    328
    329	if (PagePrivate(page)) {
    330		if (!appending)
    331			/*
    332			 * The page is dirty and we are not appending, which
    333			 * means no budget is needed at all.
    334			 */
    335			return 0;
    336
    337		mutex_lock(&ui->ui_mutex);
    338		if (ui->dirty)
    339			/*
    340			 * The page is dirty and we are appending, so the inode
    341			 * has to be marked as dirty. However, it is already
    342			 * dirty, so we do not need any budget. We may return,
    343			 * but @ui->ui_mutex hast to be left locked because we
    344			 * should prevent write-back from flushing the inode
    345			 * and freeing the budget. The lock will be released in
    346			 * 'ubifs_write_end()'.
    347			 */
    348			return 0;
    349
    350		/*
    351		 * The page is dirty, we are appending, the inode is clean, so
    352		 * we need to budget the inode change.
    353		 */
    354		req.dirtied_ino = 1;
    355	} else {
    356		if (PageChecked(page))
    357			/*
    358			 * The page corresponds to a hole and does not
    359			 * exist on the media. So changing it makes
    360			 * make the amount of indexing information
    361			 * larger, and we have to budget for a new
    362			 * page.
    363			 */
    364			req.new_page = 1;
    365		else
    366			/*
    367			 * Not a hole, the change will not add any new
    368			 * indexing information, budget for page
    369			 * change.
    370			 */
    371			req.dirtied_page = 1;
    372
    373		if (appending) {
    374			mutex_lock(&ui->ui_mutex);
    375			if (!ui->dirty)
    376				/*
    377				 * The inode is clean but we will have to mark
    378				 * it as dirty because we are appending. This
    379				 * needs a budget.
    380				 */
    381				req.dirtied_ino = 1;
    382		}
    383	}
    384
    385	return ubifs_budget_space(c, &req);
    386}
    387
    388/*
    389 * This function is called when a page of data is going to be written. Since
    390 * the page of data will not necessarily go to the flash straight away, UBIFS
    391 * has to reserve space on the media for it, which is done by means of
    392 * budgeting.
    393 *
    394 * This is the hot-path of the file-system and we are trying to optimize it as
    395 * much as possible. For this reasons it is split on 2 parts - slow and fast.
    396 *
    397 * There many budgeting cases:
    398 *     o a new page is appended - we have to budget for a new page and for
    399 *       changing the inode; however, if the inode is already dirty, there is
    400 *       no need to budget for it;
    401 *     o an existing clean page is changed - we have budget for it; if the page
    402 *       does not exist on the media (a hole), we have to budget for a new
    403 *       page; otherwise, we may budget for changing an existing page; the
    404 *       difference between these cases is that changing an existing page does
    405 *       not introduce anything new to the FS indexing information, so it does
    406 *       not grow, and smaller budget is acquired in this case;
    407 *     o an existing dirty page is changed - no need to budget at all, because
    408 *       the page budget has been acquired by earlier, when the page has been
    409 *       marked dirty.
    410 *
    411 * UBIFS budgeting sub-system may force write-back if it thinks there is no
    412 * space to reserve. This imposes some locking restrictions and makes it
    413 * impossible to take into account the above cases, and makes it impossible to
    414 * optimize budgeting.
    415 *
    416 * The solution for this is that the fast path of 'ubifs_write_begin()' assumes
    417 * there is a plenty of flash space and the budget will be acquired quickly,
    418 * without forcing write-back. The slow path does not make this assumption.
    419 */
    420static int ubifs_write_begin(struct file *file, struct address_space *mapping,
    421			     loff_t pos, unsigned len,
    422			     struct page **pagep, void **fsdata)
    423{
    424	struct inode *inode = mapping->host;
    425	struct ubifs_info *c = inode->i_sb->s_fs_info;
    426	struct ubifs_inode *ui = ubifs_inode(inode);
    427	pgoff_t index = pos >> PAGE_SHIFT;
    428	int err, appending = !!(pos + len > inode->i_size);
    429	int skipped_read = 0;
    430	struct page *page;
    431
    432	ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size);
    433	ubifs_assert(c, !c->ro_media && !c->ro_mount);
    434
    435	if (unlikely(c->ro_error))
    436		return -EROFS;
    437
    438	/* Try out the fast-path part first */
    439	page = grab_cache_page_write_begin(mapping, index);
    440	if (unlikely(!page))
    441		return -ENOMEM;
    442
    443	if (!PageUptodate(page)) {
    444		/* The page is not loaded from the flash */
    445		if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) {
    446			/*
    447			 * We change whole page so no need to load it. But we
    448			 * do not know whether this page exists on the media or
    449			 * not, so we assume the latter because it requires
    450			 * larger budget. The assumption is that it is better
    451			 * to budget a bit more than to read the page from the
    452			 * media. Thus, we are setting the @PG_checked flag
    453			 * here.
    454			 */
    455			SetPageChecked(page);
    456			skipped_read = 1;
    457		} else {
    458			err = do_readpage(page);
    459			if (err) {
    460				unlock_page(page);
    461				put_page(page);
    462				return err;
    463			}
    464		}
    465
    466		SetPageUptodate(page);
    467		ClearPageError(page);
    468	}
    469
    470	err = allocate_budget(c, page, ui, appending);
    471	if (unlikely(err)) {
    472		ubifs_assert(c, err == -ENOSPC);
    473		/*
    474		 * If we skipped reading the page because we were going to
    475		 * write all of it, then it is not up to date.
    476		 */
    477		if (skipped_read) {
    478			ClearPageChecked(page);
    479			ClearPageUptodate(page);
    480		}
    481		/*
    482		 * Budgeting failed which means it would have to force
    483		 * write-back but didn't, because we set the @fast flag in the
    484		 * request. Write-back cannot be done now, while we have the
    485		 * page locked, because it would deadlock. Unlock and free
    486		 * everything and fall-back to slow-path.
    487		 */
    488		if (appending) {
    489			ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
    490			mutex_unlock(&ui->ui_mutex);
    491		}
    492		unlock_page(page);
    493		put_page(page);
    494
    495		return write_begin_slow(mapping, pos, len, pagep);
    496	}
    497
    498	/*
    499	 * Whee, we acquired budgeting quickly - without involving
    500	 * garbage-collection, committing or forcing write-back. We return
    501	 * with @ui->ui_mutex locked if we are appending pages, and unlocked
    502	 * otherwise. This is an optimization (slightly hacky though).
    503	 */
    504	*pagep = page;
    505	return 0;
    506
    507}
    508
    509/**
    510 * cancel_budget - cancel budget.
    511 * @c: UBIFS file-system description object
    512 * @page: page to cancel budget for
    513 * @ui: UBIFS inode object the page belongs to
    514 * @appending: non-zero if the page is appended
    515 *
    516 * This is a helper function for a page write operation. It unlocks the
    517 * @ui->ui_mutex in case of appending.
    518 */
    519static void cancel_budget(struct ubifs_info *c, struct page *page,
    520			  struct ubifs_inode *ui, int appending)
    521{
    522	if (appending) {
    523		if (!ui->dirty)
    524			ubifs_release_dirty_inode_budget(c, ui);
    525		mutex_unlock(&ui->ui_mutex);
    526	}
    527	if (!PagePrivate(page)) {
    528		if (PageChecked(page))
    529			release_new_page_budget(c);
    530		else
    531			release_existing_page_budget(c);
    532	}
    533}
    534
    535static int ubifs_write_end(struct file *file, struct address_space *mapping,
    536			   loff_t pos, unsigned len, unsigned copied,
    537			   struct page *page, void *fsdata)
    538{
    539	struct inode *inode = mapping->host;
    540	struct ubifs_inode *ui = ubifs_inode(inode);
    541	struct ubifs_info *c = inode->i_sb->s_fs_info;
    542	loff_t end_pos = pos + len;
    543	int appending = !!(end_pos > inode->i_size);
    544
    545	dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld",
    546		inode->i_ino, pos, page->index, len, copied, inode->i_size);
    547
    548	if (unlikely(copied < len && len == PAGE_SIZE)) {
    549		/*
    550		 * VFS copied less data to the page that it intended and
    551		 * declared in its '->write_begin()' call via the @len
    552		 * argument. If the page was not up-to-date, and @len was
    553		 * @PAGE_SIZE, the 'ubifs_write_begin()' function did
    554		 * not load it from the media (for optimization reasons). This
    555		 * means that part of the page contains garbage. So read the
    556		 * page now.
    557		 */
    558		dbg_gen("copied %d instead of %d, read page and repeat",
    559			copied, len);
    560		cancel_budget(c, page, ui, appending);
    561		ClearPageChecked(page);
    562
    563		/*
    564		 * Return 0 to force VFS to repeat the whole operation, or the
    565		 * error code if 'do_readpage()' fails.
    566		 */
    567		copied = do_readpage(page);
    568		goto out;
    569	}
    570
    571	if (!PagePrivate(page)) {
    572		attach_page_private(page, (void *)1);
    573		atomic_long_inc(&c->dirty_pg_cnt);
    574		__set_page_dirty_nobuffers(page);
    575	}
    576
    577	if (appending) {
    578		i_size_write(inode, end_pos);
    579		ui->ui_size = end_pos;
    580		/*
    581		 * Note, we do not set @I_DIRTY_PAGES (which means that the
    582		 * inode has dirty pages), this has been done in
    583		 * '__set_page_dirty_nobuffers()'.
    584		 */
    585		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
    586		ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
    587		mutex_unlock(&ui->ui_mutex);
    588	}
    589
    590out:
    591	unlock_page(page);
    592	put_page(page);
    593	return copied;
    594}
    595
    596/**
    597 * populate_page - copy data nodes into a page for bulk-read.
    598 * @c: UBIFS file-system description object
    599 * @page: page
    600 * @bu: bulk-read information
    601 * @n: next zbranch slot
    602 *
    603 * This function returns %0 on success and a negative error code on failure.
    604 */
    605static int populate_page(struct ubifs_info *c, struct page *page,
    606			 struct bu_info *bu, int *n)
    607{
    608	int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0;
    609	struct inode *inode = page->mapping->host;
    610	loff_t i_size = i_size_read(inode);
    611	unsigned int page_block;
    612	void *addr, *zaddr;
    613	pgoff_t end_index;
    614
    615	dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
    616		inode->i_ino, page->index, i_size, page->flags);
    617
    618	addr = zaddr = kmap(page);
    619
    620	end_index = (i_size - 1) >> PAGE_SHIFT;
    621	if (!i_size || page->index > end_index) {
    622		hole = 1;
    623		memset(addr, 0, PAGE_SIZE);
    624		goto out_hole;
    625	}
    626
    627	page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
    628	while (1) {
    629		int err, len, out_len, dlen;
    630
    631		if (nn >= bu->cnt) {
    632			hole = 1;
    633			memset(addr, 0, UBIFS_BLOCK_SIZE);
    634		} else if (key_block(c, &bu->zbranch[nn].key) == page_block) {
    635			struct ubifs_data_node *dn;
    636
    637			dn = bu->buf + (bu->zbranch[nn].offs - offs);
    638
    639			ubifs_assert(c, le64_to_cpu(dn->ch.sqnum) >
    640				     ubifs_inode(inode)->creat_sqnum);
    641
    642			len = le32_to_cpu(dn->size);
    643			if (len <= 0 || len > UBIFS_BLOCK_SIZE)
    644				goto out_err;
    645
    646			dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
    647			out_len = UBIFS_BLOCK_SIZE;
    648
    649			if (IS_ENCRYPTED(inode)) {
    650				err = ubifs_decrypt(inode, dn, &dlen, page_block);
    651				if (err)
    652					goto out_err;
    653			}
    654
    655			err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len,
    656					       le16_to_cpu(dn->compr_type));
    657			if (err || len != out_len)
    658				goto out_err;
    659
    660			if (len < UBIFS_BLOCK_SIZE)
    661				memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
    662
    663			nn += 1;
    664			read = (i << UBIFS_BLOCK_SHIFT) + len;
    665		} else if (key_block(c, &bu->zbranch[nn].key) < page_block) {
    666			nn += 1;
    667			continue;
    668		} else {
    669			hole = 1;
    670			memset(addr, 0, UBIFS_BLOCK_SIZE);
    671		}
    672		if (++i >= UBIFS_BLOCKS_PER_PAGE)
    673			break;
    674		addr += UBIFS_BLOCK_SIZE;
    675		page_block += 1;
    676	}
    677
    678	if (end_index == page->index) {
    679		int len = i_size & (PAGE_SIZE - 1);
    680
    681		if (len && len < read)
    682			memset(zaddr + len, 0, read - len);
    683	}
    684
    685out_hole:
    686	if (hole) {
    687		SetPageChecked(page);
    688		dbg_gen("hole");
    689	}
    690
    691	SetPageUptodate(page);
    692	ClearPageError(page);
    693	flush_dcache_page(page);
    694	kunmap(page);
    695	*n = nn;
    696	return 0;
    697
    698out_err:
    699	ClearPageUptodate(page);
    700	SetPageError(page);
    701	flush_dcache_page(page);
    702	kunmap(page);
    703	ubifs_err(c, "bad data node (block %u, inode %lu)",
    704		  page_block, inode->i_ino);
    705	return -EINVAL;
    706}
    707
    708/**
    709 * ubifs_do_bulk_read - do bulk-read.
    710 * @c: UBIFS file-system description object
    711 * @bu: bulk-read information
    712 * @page1: first page to read
    713 *
    714 * This function returns %1 if the bulk-read is done, otherwise %0 is returned.
    715 */
    716static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
    717			      struct page *page1)
    718{
    719	pgoff_t offset = page1->index, end_index;
    720	struct address_space *mapping = page1->mapping;
    721	struct inode *inode = mapping->host;
    722	struct ubifs_inode *ui = ubifs_inode(inode);
    723	int err, page_idx, page_cnt, ret = 0, n = 0;
    724	int allocate = bu->buf ? 0 : 1;
    725	loff_t isize;
    726	gfp_t ra_gfp_mask = readahead_gfp_mask(mapping) & ~__GFP_FS;
    727
    728	err = ubifs_tnc_get_bu_keys(c, bu);
    729	if (err)
    730		goto out_warn;
    731
    732	if (bu->eof) {
    733		/* Turn off bulk-read at the end of the file */
    734		ui->read_in_a_row = 1;
    735		ui->bulk_read = 0;
    736	}
    737
    738	page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT;
    739	if (!page_cnt) {
    740		/*
    741		 * This happens when there are multiple blocks per page and the
    742		 * blocks for the first page we are looking for, are not
    743		 * together. If all the pages were like this, bulk-read would
    744		 * reduce performance, so we turn it off for a while.
    745		 */
    746		goto out_bu_off;
    747	}
    748
    749	if (bu->cnt) {
    750		if (allocate) {
    751			/*
    752			 * Allocate bulk-read buffer depending on how many data
    753			 * nodes we are going to read.
    754			 */
    755			bu->buf_len = bu->zbranch[bu->cnt - 1].offs +
    756				      bu->zbranch[bu->cnt - 1].len -
    757				      bu->zbranch[0].offs;
    758			ubifs_assert(c, bu->buf_len > 0);
    759			ubifs_assert(c, bu->buf_len <= c->leb_size);
    760			bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN);
    761			if (!bu->buf)
    762				goto out_bu_off;
    763		}
    764
    765		err = ubifs_tnc_bulk_read(c, bu);
    766		if (err)
    767			goto out_warn;
    768	}
    769
    770	err = populate_page(c, page1, bu, &n);
    771	if (err)
    772		goto out_warn;
    773
    774	unlock_page(page1);
    775	ret = 1;
    776
    777	isize = i_size_read(inode);
    778	if (isize == 0)
    779		goto out_free;
    780	end_index = ((isize - 1) >> PAGE_SHIFT);
    781
    782	for (page_idx = 1; page_idx < page_cnt; page_idx++) {
    783		pgoff_t page_offset = offset + page_idx;
    784		struct page *page;
    785
    786		if (page_offset > end_index)
    787			break;
    788		page = pagecache_get_page(mapping, page_offset,
    789				 FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT,
    790				 ra_gfp_mask);
    791		if (!page)
    792			break;
    793		if (!PageUptodate(page))
    794			err = populate_page(c, page, bu, &n);
    795		unlock_page(page);
    796		put_page(page);
    797		if (err)
    798			break;
    799	}
    800
    801	ui->last_page_read = offset + page_idx - 1;
    802
    803out_free:
    804	if (allocate)
    805		kfree(bu->buf);
    806	return ret;
    807
    808out_warn:
    809	ubifs_warn(c, "ignoring error %d and skipping bulk-read", err);
    810	goto out_free;
    811
    812out_bu_off:
    813	ui->read_in_a_row = ui->bulk_read = 0;
    814	goto out_free;
    815}
    816
    817/**
    818 * ubifs_bulk_read - determine whether to bulk-read and, if so, do it.
    819 * @page: page from which to start bulk-read.
    820 *
    821 * Some flash media are capable of reading sequentially at faster rates. UBIFS
    822 * bulk-read facility is designed to take advantage of that, by reading in one
    823 * go consecutive data nodes that are also located consecutively in the same
    824 * LEB. This function returns %1 if a bulk-read is done and %0 otherwise.
    825 */
    826static int ubifs_bulk_read(struct page *page)
    827{
    828	struct inode *inode = page->mapping->host;
    829	struct ubifs_info *c = inode->i_sb->s_fs_info;
    830	struct ubifs_inode *ui = ubifs_inode(inode);
    831	pgoff_t index = page->index, last_page_read = ui->last_page_read;
    832	struct bu_info *bu;
    833	int err = 0, allocated = 0;
    834
    835	ui->last_page_read = index;
    836	if (!c->bulk_read)
    837		return 0;
    838
    839	/*
    840	 * Bulk-read is protected by @ui->ui_mutex, but it is an optimization,
    841	 * so don't bother if we cannot lock the mutex.
    842	 */
    843	if (!mutex_trylock(&ui->ui_mutex))
    844		return 0;
    845
    846	if (index != last_page_read + 1) {
    847		/* Turn off bulk-read if we stop reading sequentially */
    848		ui->read_in_a_row = 1;
    849		if (ui->bulk_read)
    850			ui->bulk_read = 0;
    851		goto out_unlock;
    852	}
    853
    854	if (!ui->bulk_read) {
    855		ui->read_in_a_row += 1;
    856		if (ui->read_in_a_row < 3)
    857			goto out_unlock;
    858		/* Three reads in a row, so switch on bulk-read */
    859		ui->bulk_read = 1;
    860	}
    861
    862	/*
    863	 * If possible, try to use pre-allocated bulk-read information, which
    864	 * is protected by @c->bu_mutex.
    865	 */
    866	if (mutex_trylock(&c->bu_mutex))
    867		bu = &c->bu;
    868	else {
    869		bu = kmalloc(sizeof(struct bu_info), GFP_NOFS | __GFP_NOWARN);
    870		if (!bu)
    871			goto out_unlock;
    872
    873		bu->buf = NULL;
    874		allocated = 1;
    875	}
    876
    877	bu->buf_len = c->max_bu_buf_len;
    878	data_key_init(c, &bu->key, inode->i_ino,
    879		      page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
    880	err = ubifs_do_bulk_read(c, bu, page);
    881
    882	if (!allocated)
    883		mutex_unlock(&c->bu_mutex);
    884	else
    885		kfree(bu);
    886
    887out_unlock:
    888	mutex_unlock(&ui->ui_mutex);
    889	return err;
    890}
    891
    892static int ubifs_read_folio(struct file *file, struct folio *folio)
    893{
    894	struct page *page = &folio->page;
    895
    896	if (ubifs_bulk_read(page))
    897		return 0;
    898	do_readpage(page);
    899	folio_unlock(folio);
    900	return 0;
    901}
    902
    903static int do_writepage(struct page *page, int len)
    904{
    905	int err = 0, i, blen;
    906	unsigned int block;
    907	void *addr;
    908	union ubifs_key key;
    909	struct inode *inode = page->mapping->host;
    910	struct ubifs_info *c = inode->i_sb->s_fs_info;
    911
    912#ifdef UBIFS_DEBUG
    913	struct ubifs_inode *ui = ubifs_inode(inode);
    914	spin_lock(&ui->ui_lock);
    915	ubifs_assert(c, page->index <= ui->synced_i_size >> PAGE_SHIFT);
    916	spin_unlock(&ui->ui_lock);
    917#endif
    918
    919	/* Update radix tree tags */
    920	set_page_writeback(page);
    921
    922	addr = kmap(page);
    923	block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
    924	i = 0;
    925	while (len) {
    926		blen = min_t(int, len, UBIFS_BLOCK_SIZE);
    927		data_key_init(c, &key, inode->i_ino, block);
    928		err = ubifs_jnl_write_data(c, inode, &key, addr, blen);
    929		if (err)
    930			break;
    931		if (++i >= UBIFS_BLOCKS_PER_PAGE)
    932			break;
    933		block += 1;
    934		addr += blen;
    935		len -= blen;
    936	}
    937	if (err) {
    938		SetPageError(page);
    939		ubifs_err(c, "cannot write page %lu of inode %lu, error %d",
    940			  page->index, inode->i_ino, err);
    941		ubifs_ro_mode(c, err);
    942	}
    943
    944	ubifs_assert(c, PagePrivate(page));
    945	if (PageChecked(page))
    946		release_new_page_budget(c);
    947	else
    948		release_existing_page_budget(c);
    949
    950	atomic_long_dec(&c->dirty_pg_cnt);
    951	detach_page_private(page);
    952	ClearPageChecked(page);
    953
    954	kunmap(page);
    955	unlock_page(page);
    956	end_page_writeback(page);
    957	return err;
    958}
    959
    960/*
    961 * When writing-back dirty inodes, VFS first writes-back pages belonging to the
    962 * inode, then the inode itself. For UBIFS this may cause a problem. Consider a
    963 * situation when a we have an inode with size 0, then a megabyte of data is
    964 * appended to the inode, then write-back starts and flushes some amount of the
    965 * dirty pages, the journal becomes full, commit happens and finishes, and then
    966 * an unclean reboot happens. When the file system is mounted next time, the
    967 * inode size would still be 0, but there would be many pages which are beyond
    968 * the inode size, they would be indexed and consume flash space. Because the
    969 * journal has been committed, the replay would not be able to detect this
    970 * situation and correct the inode size. This means UBIFS would have to scan
    971 * whole index and correct all inode sizes, which is long an unacceptable.
    972 *
    973 * To prevent situations like this, UBIFS writes pages back only if they are
    974 * within the last synchronized inode size, i.e. the size which has been
    975 * written to the flash media last time. Otherwise, UBIFS forces inode
    976 * write-back, thus making sure the on-flash inode contains current inode size,
    977 * and then keeps writing pages back.
    978 *
    979 * Some locking issues explanation. 'ubifs_writepage()' first is called with
    980 * the page locked, and it locks @ui_mutex. However, write-back does take inode
    981 * @i_mutex, which means other VFS operations may be run on this inode at the
    982 * same time. And the problematic one is truncation to smaller size, from where
    983 * we have to call 'truncate_setsize()', which first changes @inode->i_size,
    984 * then drops the truncated pages. And while dropping the pages, it takes the
    985 * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()'
    986 * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'.
    987 * This means that @inode->i_size is changed while @ui_mutex is unlocked.
    988 *
    989 * XXX(truncate): with the new truncate sequence this is not true anymore,
    990 * and the calls to truncate_setsize can be move around freely.  They should
    991 * be moved to the very end of the truncate sequence.
    992 *
    993 * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
    994 * inode size. How do we do this if @inode->i_size may became smaller while we
    995 * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the
    996 * @ui->ui_isize "shadow" field which UBIFS uses instead of @inode->i_size
    997 * internally and updates it under @ui_mutex.
    998 *
    999 * Q: why we do not worry that if we race with truncation, we may end up with a
   1000 * situation when the inode is truncated while we are in the middle of
   1001 * 'do_writepage()', so we do write beyond inode size?
   1002 * A: If we are in the middle of 'do_writepage()', truncation would be locked
   1003 * on the page lock and it would not write the truncated inode node to the
   1004 * journal before we have finished.
   1005 */
   1006static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
   1007{
   1008	struct inode *inode = page->mapping->host;
   1009	struct ubifs_info *c = inode->i_sb->s_fs_info;
   1010	struct ubifs_inode *ui = ubifs_inode(inode);
   1011	loff_t i_size =  i_size_read(inode), synced_i_size;
   1012	pgoff_t end_index = i_size >> PAGE_SHIFT;
   1013	int err, len = i_size & (PAGE_SIZE - 1);
   1014	void *kaddr;
   1015
   1016	dbg_gen("ino %lu, pg %lu, pg flags %#lx",
   1017		inode->i_ino, page->index, page->flags);
   1018	ubifs_assert(c, PagePrivate(page));
   1019
   1020	/* Is the page fully outside @i_size? (truncate in progress) */
   1021	if (page->index > end_index || (page->index == end_index && !len)) {
   1022		err = 0;
   1023		goto out_unlock;
   1024	}
   1025
   1026	spin_lock(&ui->ui_lock);
   1027	synced_i_size = ui->synced_i_size;
   1028	spin_unlock(&ui->ui_lock);
   1029
   1030	/* Is the page fully inside @i_size? */
   1031	if (page->index < end_index) {
   1032		if (page->index >= synced_i_size >> PAGE_SHIFT) {
   1033			err = inode->i_sb->s_op->write_inode(inode, NULL);
   1034			if (err)
   1035				goto out_unlock;
   1036			/*
   1037			 * The inode has been written, but the write-buffer has
   1038			 * not been synchronized, so in case of an unclean
   1039			 * reboot we may end up with some pages beyond inode
   1040			 * size, but they would be in the journal (because
   1041			 * commit flushes write buffers) and recovery would deal
   1042			 * with this.
   1043			 */
   1044		}
   1045		return do_writepage(page, PAGE_SIZE);
   1046	}
   1047
   1048	/*
   1049	 * The page straddles @i_size. It must be zeroed out on each and every
   1050	 * writepage invocation because it may be mmapped. "A file is mapped
   1051	 * in multiples of the page size. For a file that is not a multiple of
   1052	 * the page size, the remaining memory is zeroed when mapped, and
   1053	 * writes to that region are not written out to the file."
   1054	 */
   1055	kaddr = kmap_atomic(page);
   1056	memset(kaddr + len, 0, PAGE_SIZE - len);
   1057	flush_dcache_page(page);
   1058	kunmap_atomic(kaddr);
   1059
   1060	if (i_size > synced_i_size) {
   1061		err = inode->i_sb->s_op->write_inode(inode, NULL);
   1062		if (err)
   1063			goto out_unlock;
   1064	}
   1065
   1066	return do_writepage(page, len);
   1067
   1068out_unlock:
   1069	unlock_page(page);
   1070	return err;
   1071}
   1072
   1073/**
   1074 * do_attr_changes - change inode attributes.
   1075 * @inode: inode to change attributes for
   1076 * @attr: describes attributes to change
   1077 */
   1078static void do_attr_changes(struct inode *inode, const struct iattr *attr)
   1079{
   1080	if (attr->ia_valid & ATTR_UID)
   1081		inode->i_uid = attr->ia_uid;
   1082	if (attr->ia_valid & ATTR_GID)
   1083		inode->i_gid = attr->ia_gid;
   1084	if (attr->ia_valid & ATTR_ATIME)
   1085		inode->i_atime = attr->ia_atime;
   1086	if (attr->ia_valid & ATTR_MTIME)
   1087		inode->i_mtime = attr->ia_mtime;
   1088	if (attr->ia_valid & ATTR_CTIME)
   1089		inode->i_ctime = attr->ia_ctime;
   1090	if (attr->ia_valid & ATTR_MODE) {
   1091		umode_t mode = attr->ia_mode;
   1092
   1093		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
   1094			mode &= ~S_ISGID;
   1095		inode->i_mode = mode;
   1096	}
   1097}
   1098
   1099/**
   1100 * do_truncation - truncate an inode.
   1101 * @c: UBIFS file-system description object
   1102 * @inode: inode to truncate
   1103 * @attr: inode attribute changes description
   1104 *
   1105 * This function implements VFS '->setattr()' call when the inode is truncated
   1106 * to a smaller size. Returns zero in case of success and a negative error code
   1107 * in case of failure.
   1108 */
   1109static int do_truncation(struct ubifs_info *c, struct inode *inode,
   1110			 const struct iattr *attr)
   1111{
   1112	int err;
   1113	struct ubifs_budget_req req;
   1114	loff_t old_size = inode->i_size, new_size = attr->ia_size;
   1115	int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1;
   1116	struct ubifs_inode *ui = ubifs_inode(inode);
   1117
   1118	dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
   1119	memset(&req, 0, sizeof(struct ubifs_budget_req));
   1120
   1121	/*
   1122	 * If this is truncation to a smaller size, and we do not truncate on a
   1123	 * block boundary, budget for changing one data block, because the last
   1124	 * block will be re-written.
   1125	 */
   1126	if (new_size & (UBIFS_BLOCK_SIZE - 1))
   1127		req.dirtied_page = 1;
   1128
   1129	req.dirtied_ino = 1;
   1130	/* A funny way to budget for truncation node */
   1131	req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ;
   1132	err = ubifs_budget_space(c, &req);
   1133	if (err) {
   1134		/*
   1135		 * Treat truncations to zero as deletion and always allow them,
   1136		 * just like we do for '->unlink()'.
   1137		 */
   1138		if (new_size || err != -ENOSPC)
   1139			return err;
   1140		budgeted = 0;
   1141	}
   1142
   1143	truncate_setsize(inode, new_size);
   1144
   1145	if (offset) {
   1146		pgoff_t index = new_size >> PAGE_SHIFT;
   1147		struct page *page;
   1148
   1149		page = find_lock_page(inode->i_mapping, index);
   1150		if (page) {
   1151			if (PageDirty(page)) {
   1152				/*
   1153				 * 'ubifs_jnl_truncate()' will try to truncate
   1154				 * the last data node, but it contains
   1155				 * out-of-date data because the page is dirty.
   1156				 * Write the page now, so that
   1157				 * 'ubifs_jnl_truncate()' will see an already
   1158				 * truncated (and up to date) data node.
   1159				 */
   1160				ubifs_assert(c, PagePrivate(page));
   1161
   1162				clear_page_dirty_for_io(page);
   1163				if (UBIFS_BLOCKS_PER_PAGE_SHIFT)
   1164					offset = new_size &
   1165						 (PAGE_SIZE - 1);
   1166				err = do_writepage(page, offset);
   1167				put_page(page);
   1168				if (err)
   1169					goto out_budg;
   1170				/*
   1171				 * We could now tell 'ubifs_jnl_truncate()' not
   1172				 * to read the last block.
   1173				 */
   1174			} else {
   1175				/*
   1176				 * We could 'kmap()' the page and pass the data
   1177				 * to 'ubifs_jnl_truncate()' to save it from
   1178				 * having to read it.
   1179				 */
   1180				unlock_page(page);
   1181				put_page(page);
   1182			}
   1183		}
   1184	}
   1185
   1186	mutex_lock(&ui->ui_mutex);
   1187	ui->ui_size = inode->i_size;
   1188	/* Truncation changes inode [mc]time */
   1189	inode->i_mtime = inode->i_ctime = current_time(inode);
   1190	/* Other attributes may be changed at the same time as well */
   1191	do_attr_changes(inode, attr);
   1192	err = ubifs_jnl_truncate(c, inode, old_size, new_size);
   1193	mutex_unlock(&ui->ui_mutex);
   1194
   1195out_budg:
   1196	if (budgeted)
   1197		ubifs_release_budget(c, &req);
   1198	else {
   1199		c->bi.nospace = c->bi.nospace_rp = 0;
   1200		smp_wmb();
   1201	}
   1202	return err;
   1203}
   1204
   1205/**
   1206 * do_setattr - change inode attributes.
   1207 * @c: UBIFS file-system description object
   1208 * @inode: inode to change attributes for
   1209 * @attr: inode attribute changes description
   1210 *
   1211 * This function implements VFS '->setattr()' call for all cases except
   1212 * truncations to smaller size. Returns zero in case of success and a negative
   1213 * error code in case of failure.
   1214 */
   1215static int do_setattr(struct ubifs_info *c, struct inode *inode,
   1216		      const struct iattr *attr)
   1217{
   1218	int err, release;
   1219	loff_t new_size = attr->ia_size;
   1220	struct ubifs_inode *ui = ubifs_inode(inode);
   1221	struct ubifs_budget_req req = { .dirtied_ino = 1,
   1222				.dirtied_ino_d = ALIGN(ui->data_len, 8) };
   1223
   1224	err = ubifs_budget_space(c, &req);
   1225	if (err)
   1226		return err;
   1227
   1228	if (attr->ia_valid & ATTR_SIZE) {
   1229		dbg_gen("size %lld -> %lld", inode->i_size, new_size);
   1230		truncate_setsize(inode, new_size);
   1231	}
   1232
   1233	mutex_lock(&ui->ui_mutex);
   1234	if (attr->ia_valid & ATTR_SIZE) {
   1235		/* Truncation changes inode [mc]time */
   1236		inode->i_mtime = inode->i_ctime = current_time(inode);
   1237		/* 'truncate_setsize()' changed @i_size, update @ui_size */
   1238		ui->ui_size = inode->i_size;
   1239	}
   1240
   1241	do_attr_changes(inode, attr);
   1242
   1243	release = ui->dirty;
   1244	if (attr->ia_valid & ATTR_SIZE)
   1245		/*
   1246		 * Inode length changed, so we have to make sure
   1247		 * @I_DIRTY_DATASYNC is set.
   1248		 */
   1249		 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
   1250	else
   1251		mark_inode_dirty_sync(inode);
   1252	mutex_unlock(&ui->ui_mutex);
   1253
   1254	if (release)
   1255		ubifs_release_budget(c, &req);
   1256	if (IS_SYNC(inode))
   1257		err = inode->i_sb->s_op->write_inode(inode, NULL);
   1258	return err;
   1259}
   1260
   1261int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
   1262		  struct iattr *attr)
   1263{
   1264	int err;
   1265	struct inode *inode = d_inode(dentry);
   1266	struct ubifs_info *c = inode->i_sb->s_fs_info;
   1267
   1268	dbg_gen("ino %lu, mode %#x, ia_valid %#x",
   1269		inode->i_ino, inode->i_mode, attr->ia_valid);
   1270	err = setattr_prepare(&init_user_ns, dentry, attr);
   1271	if (err)
   1272		return err;
   1273
   1274	err = dbg_check_synced_i_size(c, inode);
   1275	if (err)
   1276		return err;
   1277
   1278	err = fscrypt_prepare_setattr(dentry, attr);
   1279	if (err)
   1280		return err;
   1281
   1282	if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size < inode->i_size)
   1283		/* Truncation to a smaller size */
   1284		err = do_truncation(c, inode, attr);
   1285	else
   1286		err = do_setattr(c, inode, attr);
   1287
   1288	return err;
   1289}
   1290
   1291static void ubifs_invalidate_folio(struct folio *folio, size_t offset,
   1292				 size_t length)
   1293{
   1294	struct inode *inode = folio->mapping->host;
   1295	struct ubifs_info *c = inode->i_sb->s_fs_info;
   1296
   1297	ubifs_assert(c, folio_test_private(folio));
   1298	if (offset || length < folio_size(folio))
   1299		/* Partial folio remains dirty */
   1300		return;
   1301
   1302	if (folio_test_checked(folio))
   1303		release_new_page_budget(c);
   1304	else
   1305		release_existing_page_budget(c);
   1306
   1307	atomic_long_dec(&c->dirty_pg_cnt);
   1308	folio_detach_private(folio);
   1309	folio_clear_checked(folio);
   1310}
   1311
   1312int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
   1313{
   1314	struct inode *inode = file->f_mapping->host;
   1315	struct ubifs_info *c = inode->i_sb->s_fs_info;
   1316	int err;
   1317
   1318	dbg_gen("syncing inode %lu", inode->i_ino);
   1319
   1320	if (c->ro_mount)
   1321		/*
   1322		 * For some really strange reasons VFS does not filter out
   1323		 * 'fsync()' for R/O mounted file-systems as per 2.6.39.
   1324		 */
   1325		return 0;
   1326
   1327	err = file_write_and_wait_range(file, start, end);
   1328	if (err)
   1329		return err;
   1330	inode_lock(inode);
   1331
   1332	/* Synchronize the inode unless this is a 'datasync()' call. */
   1333	if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
   1334		err = inode->i_sb->s_op->write_inode(inode, NULL);
   1335		if (err)
   1336			goto out;
   1337	}
   1338
   1339	/*
   1340	 * Nodes related to this inode may still sit in a write-buffer. Flush
   1341	 * them.
   1342	 */
   1343	err = ubifs_sync_wbufs_by_inode(c, inode);
   1344out:
   1345	inode_unlock(inode);
   1346	return err;
   1347}
   1348
   1349/**
   1350 * mctime_update_needed - check if mtime or ctime update is needed.
   1351 * @inode: the inode to do the check for
   1352 * @now: current time
   1353 *
   1354 * This helper function checks if the inode mtime/ctime should be updated or
   1355 * not. If current values of the time-stamps are within the UBIFS inode time
   1356 * granularity, they are not updated. This is an optimization.
   1357 */
   1358static inline int mctime_update_needed(const struct inode *inode,
   1359				       const struct timespec64 *now)
   1360{
   1361	if (!timespec64_equal(&inode->i_mtime, now) ||
   1362	    !timespec64_equal(&inode->i_ctime, now))
   1363		return 1;
   1364	return 0;
   1365}
   1366
   1367/**
   1368 * ubifs_update_time - update time of inode.
   1369 * @inode: inode to update
   1370 *
   1371 * This function updates time of the inode.
   1372 */
   1373int ubifs_update_time(struct inode *inode, struct timespec64 *time,
   1374			     int flags)
   1375{
   1376	struct ubifs_inode *ui = ubifs_inode(inode);
   1377	struct ubifs_info *c = inode->i_sb->s_fs_info;
   1378	struct ubifs_budget_req req = { .dirtied_ino = 1,
   1379			.dirtied_ino_d = ALIGN(ui->data_len, 8) };
   1380	int err, release;
   1381
   1382	if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT))
   1383		return generic_update_time(inode, time, flags);
   1384
   1385	err = ubifs_budget_space(c, &req);
   1386	if (err)
   1387		return err;
   1388
   1389	mutex_lock(&ui->ui_mutex);
   1390	if (flags & S_ATIME)
   1391		inode->i_atime = *time;
   1392	if (flags & S_CTIME)
   1393		inode->i_ctime = *time;
   1394	if (flags & S_MTIME)
   1395		inode->i_mtime = *time;
   1396
   1397	release = ui->dirty;
   1398	__mark_inode_dirty(inode, I_DIRTY_SYNC);
   1399	mutex_unlock(&ui->ui_mutex);
   1400	if (release)
   1401		ubifs_release_budget(c, &req);
   1402	return 0;
   1403}
   1404
   1405/**
   1406 * update_mctime - update mtime and ctime of an inode.
   1407 * @inode: inode to update
   1408 *
   1409 * This function updates mtime and ctime of the inode if it is not equivalent to
   1410 * current time. Returns zero in case of success and a negative error code in
   1411 * case of failure.
   1412 */
   1413static int update_mctime(struct inode *inode)
   1414{
   1415	struct timespec64 now = current_time(inode);
   1416	struct ubifs_inode *ui = ubifs_inode(inode);
   1417	struct ubifs_info *c = inode->i_sb->s_fs_info;
   1418
   1419	if (mctime_update_needed(inode, &now)) {
   1420		int err, release;
   1421		struct ubifs_budget_req req = { .dirtied_ino = 1,
   1422				.dirtied_ino_d = ALIGN(ui->data_len, 8) };
   1423
   1424		err = ubifs_budget_space(c, &req);
   1425		if (err)
   1426			return err;
   1427
   1428		mutex_lock(&ui->ui_mutex);
   1429		inode->i_mtime = inode->i_ctime = current_time(inode);
   1430		release = ui->dirty;
   1431		mark_inode_dirty_sync(inode);
   1432		mutex_unlock(&ui->ui_mutex);
   1433		if (release)
   1434			ubifs_release_budget(c, &req);
   1435	}
   1436
   1437	return 0;
   1438}
   1439
   1440static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from)
   1441{
   1442	int err = update_mctime(file_inode(iocb->ki_filp));
   1443	if (err)
   1444		return err;
   1445
   1446	return generic_file_write_iter(iocb, from);
   1447}
   1448
   1449static bool ubifs_dirty_folio(struct address_space *mapping,
   1450		struct folio *folio)
   1451{
   1452	bool ret;
   1453	struct ubifs_info *c = mapping->host->i_sb->s_fs_info;
   1454
   1455	ret = filemap_dirty_folio(mapping, folio);
   1456	/*
   1457	 * An attempt to dirty a page without budgeting for it - should not
   1458	 * happen.
   1459	 */
   1460	ubifs_assert(c, ret == false);
   1461	return ret;
   1462}
   1463
   1464#ifdef CONFIG_MIGRATION
   1465static int ubifs_migrate_page(struct address_space *mapping,
   1466		struct page *newpage, struct page *page, enum migrate_mode mode)
   1467{
   1468	int rc;
   1469
   1470	rc = migrate_page_move_mapping(mapping, newpage, page, 0);
   1471	if (rc != MIGRATEPAGE_SUCCESS)
   1472		return rc;
   1473
   1474	if (PagePrivate(page)) {
   1475		detach_page_private(page);
   1476		attach_page_private(newpage, (void *)1);
   1477	}
   1478
   1479	if (mode != MIGRATE_SYNC_NO_COPY)
   1480		migrate_page_copy(newpage, page);
   1481	else
   1482		migrate_page_states(newpage, page);
   1483	return MIGRATEPAGE_SUCCESS;
   1484}
   1485#endif
   1486
   1487static bool ubifs_release_folio(struct folio *folio, gfp_t unused_gfp_flags)
   1488{
   1489	struct inode *inode = folio->mapping->host;
   1490	struct ubifs_info *c = inode->i_sb->s_fs_info;
   1491
   1492	/*
   1493	 * An attempt to release a dirty page without budgeting for it - should
   1494	 * not happen.
   1495	 */
   1496	if (folio_test_writeback(folio))
   1497		return false;
   1498	ubifs_assert(c, folio_test_private(folio));
   1499	ubifs_assert(c, 0);
   1500	folio_detach_private(folio);
   1501	folio_clear_checked(folio);
   1502	return true;
   1503}
   1504
   1505/*
   1506 * mmap()d file has taken write protection fault and is being made writable.
   1507 * UBIFS must ensure page is budgeted for.
   1508 */
   1509static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
   1510{
   1511	struct page *page = vmf->page;
   1512	struct inode *inode = file_inode(vmf->vma->vm_file);
   1513	struct ubifs_info *c = inode->i_sb->s_fs_info;
   1514	struct timespec64 now = current_time(inode);
   1515	struct ubifs_budget_req req = { .new_page = 1 };
   1516	int err, update_time;
   1517
   1518	dbg_gen("ino %lu, pg %lu, i_size %lld",	inode->i_ino, page->index,
   1519		i_size_read(inode));
   1520	ubifs_assert(c, !c->ro_media && !c->ro_mount);
   1521
   1522	if (unlikely(c->ro_error))
   1523		return VM_FAULT_SIGBUS; /* -EROFS */
   1524
   1525	/*
   1526	 * We have not locked @page so far so we may budget for changing the
   1527	 * page. Note, we cannot do this after we locked the page, because
   1528	 * budgeting may cause write-back which would cause deadlock.
   1529	 *
   1530	 * At the moment we do not know whether the page is dirty or not, so we
   1531	 * assume that it is not and budget for a new page. We could look at
   1532	 * the @PG_private flag and figure this out, but we may race with write
   1533	 * back and the page state may change by the time we lock it, so this
   1534	 * would need additional care. We do not bother with this at the
   1535	 * moment, although it might be good idea to do. Instead, we allocate
   1536	 * budget for a new page and amend it later on if the page was in fact
   1537	 * dirty.
   1538	 *
   1539	 * The budgeting-related logic of this function is similar to what we
   1540	 * do in 'ubifs_write_begin()' and 'ubifs_write_end()'. Glance there
   1541	 * for more comments.
   1542	 */
   1543	update_time = mctime_update_needed(inode, &now);
   1544	if (update_time)
   1545		/*
   1546		 * We have to change inode time stamp which requires extra
   1547		 * budgeting.
   1548		 */
   1549		req.dirtied_ino = 1;
   1550
   1551	err = ubifs_budget_space(c, &req);
   1552	if (unlikely(err)) {
   1553		if (err == -ENOSPC)
   1554			ubifs_warn(c, "out of space for mmapped file (inode number %lu)",
   1555				   inode->i_ino);
   1556		return VM_FAULT_SIGBUS;
   1557	}
   1558
   1559	lock_page(page);
   1560	if (unlikely(page->mapping != inode->i_mapping ||
   1561		     page_offset(page) > i_size_read(inode))) {
   1562		/* Page got truncated out from underneath us */
   1563		goto sigbus;
   1564	}
   1565
   1566	if (PagePrivate(page))
   1567		release_new_page_budget(c);
   1568	else {
   1569		if (!PageChecked(page))
   1570			ubifs_convert_page_budget(c);
   1571		attach_page_private(page, (void *)1);
   1572		atomic_long_inc(&c->dirty_pg_cnt);
   1573		__set_page_dirty_nobuffers(page);
   1574	}
   1575
   1576	if (update_time) {
   1577		int release;
   1578		struct ubifs_inode *ui = ubifs_inode(inode);
   1579
   1580		mutex_lock(&ui->ui_mutex);
   1581		inode->i_mtime = inode->i_ctime = current_time(inode);
   1582		release = ui->dirty;
   1583		mark_inode_dirty_sync(inode);
   1584		mutex_unlock(&ui->ui_mutex);
   1585		if (release)
   1586			ubifs_release_dirty_inode_budget(c, ui);
   1587	}
   1588
   1589	wait_for_stable_page(page);
   1590	return VM_FAULT_LOCKED;
   1591
   1592sigbus:
   1593	unlock_page(page);
   1594	ubifs_release_budget(c, &req);
   1595	return VM_FAULT_SIGBUS;
   1596}
   1597
   1598static const struct vm_operations_struct ubifs_file_vm_ops = {
   1599	.fault        = filemap_fault,
   1600	.map_pages = filemap_map_pages,
   1601	.page_mkwrite = ubifs_vm_page_mkwrite,
   1602};
   1603
   1604static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
   1605{
   1606	int err;
   1607
   1608	err = generic_file_mmap(file, vma);
   1609	if (err)
   1610		return err;
   1611	vma->vm_ops = &ubifs_file_vm_ops;
   1612
   1613	if (IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT))
   1614		file_accessed(file);
   1615
   1616	return 0;
   1617}
   1618
   1619static const char *ubifs_get_link(struct dentry *dentry,
   1620					    struct inode *inode,
   1621					    struct delayed_call *done)
   1622{
   1623	struct ubifs_inode *ui = ubifs_inode(inode);
   1624
   1625	if (!IS_ENCRYPTED(inode))
   1626		return ui->data;
   1627
   1628	if (!dentry)
   1629		return ERR_PTR(-ECHILD);
   1630
   1631	return fscrypt_get_symlink(inode, ui->data, ui->data_len, done);
   1632}
   1633
   1634static int ubifs_symlink_getattr(struct user_namespace *mnt_userns,
   1635				 const struct path *path, struct kstat *stat,
   1636				 u32 request_mask, unsigned int query_flags)
   1637{
   1638	ubifs_getattr(mnt_userns, path, stat, request_mask, query_flags);
   1639
   1640	if (IS_ENCRYPTED(d_inode(path->dentry)))
   1641		return fscrypt_symlink_getattr(path, stat);
   1642	return 0;
   1643}
   1644
   1645const struct address_space_operations ubifs_file_address_operations = {
   1646	.read_folio     = ubifs_read_folio,
   1647	.writepage      = ubifs_writepage,
   1648	.write_begin    = ubifs_write_begin,
   1649	.write_end      = ubifs_write_end,
   1650	.invalidate_folio = ubifs_invalidate_folio,
   1651	.dirty_folio	= ubifs_dirty_folio,
   1652#ifdef CONFIG_MIGRATION
   1653	.migratepage	= ubifs_migrate_page,
   1654#endif
   1655	.release_folio    = ubifs_release_folio,
   1656};
   1657
   1658const struct inode_operations ubifs_file_inode_operations = {
   1659	.setattr     = ubifs_setattr,
   1660	.getattr     = ubifs_getattr,
   1661	.listxattr   = ubifs_listxattr,
   1662	.update_time = ubifs_update_time,
   1663	.fileattr_get = ubifs_fileattr_get,
   1664	.fileattr_set = ubifs_fileattr_set,
   1665};
   1666
   1667const struct inode_operations ubifs_symlink_inode_operations = {
   1668	.get_link    = ubifs_get_link,
   1669	.setattr     = ubifs_setattr,
   1670	.getattr     = ubifs_symlink_getattr,
   1671	.listxattr   = ubifs_listxattr,
   1672	.update_time = ubifs_update_time,
   1673};
   1674
   1675const struct file_operations ubifs_file_operations = {
   1676	.llseek         = generic_file_llseek,
   1677	.read_iter      = generic_file_read_iter,
   1678	.write_iter     = ubifs_write_iter,
   1679	.mmap           = ubifs_file_mmap,
   1680	.fsync          = ubifs_fsync,
   1681	.unlocked_ioctl = ubifs_ioctl,
   1682	.splice_read	= generic_file_splice_read,
   1683	.splice_write	= iter_file_splice_write,
   1684	.open		= fscrypt_file_open,
   1685#ifdef CONFIG_COMPAT
   1686	.compat_ioctl   = ubifs_compat_ioctl,
   1687#endif
   1688};