cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

replay.c (34283B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * This file is part of UBIFS.
      4 *
      5 * Copyright (C) 2006-2008 Nokia Corporation.
      6 *
      7 * Authors: Adrian Hunter
      8 *          Artem Bityutskiy (Битюцкий Артём)
      9 */
     10
     11/*
     12 * This file contains journal replay code. It runs when the file-system is being
     13 * mounted and requires no locking.
     14 *
     15 * The larger is the journal, the longer it takes to scan it, so the longer it
     16 * takes to mount UBIFS. This is why the journal has limited size which may be
     17 * changed depending on the system requirements. But a larger journal gives
     18 * faster I/O speed because it writes the index less frequently. So this is a
     19 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
     20 * larger is the journal, the more memory its index may consume.
     21 */
     22
     23#include "ubifs.h"
     24#include <linux/list_sort.h>
     25#include <crypto/hash.h>
     26#include <crypto/algapi.h>
     27
     28/**
     29 * struct replay_entry - replay list entry.
     30 * @lnum: logical eraseblock number of the node
     31 * @offs: node offset
     32 * @len: node length
     33 * @deletion: non-zero if this entry corresponds to a node deletion
     34 * @sqnum: node sequence number
     35 * @list: links the replay list
     36 * @key: node key
     37 * @nm: directory entry name
     38 * @old_size: truncation old size
     39 * @new_size: truncation new size
     40 *
     41 * The replay process first scans all buds and builds the replay list, then
     42 * sorts the replay list in nodes sequence number order, and then inserts all
     43 * the replay entries to the TNC.
     44 */
     45struct replay_entry {
     46	int lnum;
     47	int offs;
     48	int len;
     49	u8 hash[UBIFS_HASH_ARR_SZ];
     50	unsigned int deletion:1;
     51	unsigned long long sqnum;
     52	struct list_head list;
     53	union ubifs_key key;
     54	union {
     55		struct fscrypt_name nm;
     56		struct {
     57			loff_t old_size;
     58			loff_t new_size;
     59		};
     60	};
     61};
     62
     63/**
     64 * struct bud_entry - entry in the list of buds to replay.
     65 * @list: next bud in the list
     66 * @bud: bud description object
     67 * @sqnum: reference node sequence number
     68 * @free: free bytes in the bud
     69 * @dirty: dirty bytes in the bud
     70 */
     71struct bud_entry {
     72	struct list_head list;
     73	struct ubifs_bud *bud;
     74	unsigned long long sqnum;
     75	int free;
     76	int dirty;
     77};
     78
     79/**
     80 * set_bud_lprops - set free and dirty space used by a bud.
     81 * @c: UBIFS file-system description object
     82 * @b: bud entry which describes the bud
     83 *
     84 * This function makes sure the LEB properties of bud @b are set correctly
     85 * after the replay. Returns zero in case of success and a negative error code
     86 * in case of failure.
     87 */
     88static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
     89{
     90	const struct ubifs_lprops *lp;
     91	int err = 0, dirty;
     92
     93	ubifs_get_lprops(c);
     94
     95	lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
     96	if (IS_ERR(lp)) {
     97		err = PTR_ERR(lp);
     98		goto out;
     99	}
    100
    101	dirty = lp->dirty;
    102	if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
    103		/*
    104		 * The LEB was added to the journal with a starting offset of
    105		 * zero which means the LEB must have been empty. The LEB
    106		 * property values should be @lp->free == @c->leb_size and
    107		 * @lp->dirty == 0, but that is not the case. The reason is that
    108		 * the LEB had been garbage collected before it became the bud,
    109		 * and there was no commit in between. The garbage collector
    110		 * resets the free and dirty space without recording it
    111		 * anywhere except lprops, so if there was no commit then
    112		 * lprops does not have that information.
    113		 *
    114		 * We do not need to adjust free space because the scan has told
    115		 * us the exact value which is recorded in the replay entry as
    116		 * @b->free.
    117		 *
    118		 * However we do need to subtract from the dirty space the
    119		 * amount of space that the garbage collector reclaimed, which
    120		 * is the whole LEB minus the amount of space that was free.
    121		 */
    122		dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
    123			lp->free, lp->dirty);
    124		dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
    125			lp->free, lp->dirty);
    126		dirty -= c->leb_size - lp->free;
    127		/*
    128		 * If the replay order was perfect the dirty space would now be
    129		 * zero. The order is not perfect because the journal heads
    130		 * race with each other. This is not a problem but is does mean
    131		 * that the dirty space may temporarily exceed c->leb_size
    132		 * during the replay.
    133		 */
    134		if (dirty != 0)
    135			dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty",
    136				b->bud->lnum, lp->free, lp->dirty, b->free,
    137				b->dirty);
    138	}
    139	lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
    140			     lp->flags | LPROPS_TAKEN, 0);
    141	if (IS_ERR(lp)) {
    142		err = PTR_ERR(lp);
    143		goto out;
    144	}
    145
    146	/* Make sure the journal head points to the latest bud */
    147	err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
    148				     b->bud->lnum, c->leb_size - b->free);
    149
    150out:
    151	ubifs_release_lprops(c);
    152	return err;
    153}
    154
    155/**
    156 * set_buds_lprops - set free and dirty space for all replayed buds.
    157 * @c: UBIFS file-system description object
    158 *
    159 * This function sets LEB properties for all replayed buds. Returns zero in
    160 * case of success and a negative error code in case of failure.
    161 */
    162static int set_buds_lprops(struct ubifs_info *c)
    163{
    164	struct bud_entry *b;
    165	int err;
    166
    167	list_for_each_entry(b, &c->replay_buds, list) {
    168		err = set_bud_lprops(c, b);
    169		if (err)
    170			return err;
    171	}
    172
    173	return 0;
    174}
    175
    176/**
    177 * trun_remove_range - apply a replay entry for a truncation to the TNC.
    178 * @c: UBIFS file-system description object
    179 * @r: replay entry of truncation
    180 */
    181static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
    182{
    183	unsigned min_blk, max_blk;
    184	union ubifs_key min_key, max_key;
    185	ino_t ino;
    186
    187	min_blk = r->new_size / UBIFS_BLOCK_SIZE;
    188	if (r->new_size & (UBIFS_BLOCK_SIZE - 1))
    189		min_blk += 1;
    190
    191	max_blk = r->old_size / UBIFS_BLOCK_SIZE;
    192	if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0)
    193		max_blk -= 1;
    194
    195	ino = key_inum(c, &r->key);
    196
    197	data_key_init(c, &min_key, ino, min_blk);
    198	data_key_init(c, &max_key, ino, max_blk);
    199
    200	return ubifs_tnc_remove_range(c, &min_key, &max_key);
    201}
    202
    203/**
    204 * inode_still_linked - check whether inode in question will be re-linked.
    205 * @c: UBIFS file-system description object
    206 * @rino: replay entry to test
    207 *
    208 * O_TMPFILE files can be re-linked, this means link count goes from 0 to 1.
    209 * This case needs special care, otherwise all references to the inode will
    210 * be removed upon the first replay entry of an inode with link count 0
    211 * is found.
    212 */
    213static bool inode_still_linked(struct ubifs_info *c, struct replay_entry *rino)
    214{
    215	struct replay_entry *r;
    216
    217	ubifs_assert(c, rino->deletion);
    218	ubifs_assert(c, key_type(c, &rino->key) == UBIFS_INO_KEY);
    219
    220	/*
    221	 * Find the most recent entry for the inode behind @rino and check
    222	 * whether it is a deletion.
    223	 */
    224	list_for_each_entry_reverse(r, &c->replay_list, list) {
    225		ubifs_assert(c, r->sqnum >= rino->sqnum);
    226		if (key_inum(c, &r->key) == key_inum(c, &rino->key) &&
    227		    key_type(c, &r->key) == UBIFS_INO_KEY)
    228			return r->deletion == 0;
    229
    230	}
    231
    232	ubifs_assert(c, 0);
    233	return false;
    234}
    235
    236/**
    237 * apply_replay_entry - apply a replay entry to the TNC.
    238 * @c: UBIFS file-system description object
    239 * @r: replay entry to apply
    240 *
    241 * Apply a replay entry to the TNC.
    242 */
    243static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
    244{
    245	int err;
    246
    247	dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ",
    248		 r->lnum, r->offs, r->len, r->deletion, r->sqnum);
    249
    250	if (is_hash_key(c, &r->key)) {
    251		if (r->deletion)
    252			err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
    253		else
    254			err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
    255					       r->len, r->hash, &r->nm);
    256	} else {
    257		if (r->deletion)
    258			switch (key_type(c, &r->key)) {
    259			case UBIFS_INO_KEY:
    260			{
    261				ino_t inum = key_inum(c, &r->key);
    262
    263				if (inode_still_linked(c, r)) {
    264					err = 0;
    265					break;
    266				}
    267
    268				err = ubifs_tnc_remove_ino(c, inum);
    269				break;
    270			}
    271			case UBIFS_TRUN_KEY:
    272				err = trun_remove_range(c, r);
    273				break;
    274			default:
    275				err = ubifs_tnc_remove(c, &r->key);
    276				break;
    277			}
    278		else
    279			err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
    280					    r->len, r->hash);
    281		if (err)
    282			return err;
    283
    284		if (c->need_recovery)
    285			err = ubifs_recover_size_accum(c, &r->key, r->deletion,
    286						       r->new_size);
    287	}
    288
    289	return err;
    290}
    291
    292/**
    293 * replay_entries_cmp - compare 2 replay entries.
    294 * @priv: UBIFS file-system description object
    295 * @a: first replay entry
    296 * @b: second replay entry
    297 *
    298 * This is a comparios function for 'list_sort()' which compares 2 replay
    299 * entries @a and @b by comparing their sequence number.  Returns %1 if @a has
    300 * greater sequence number and %-1 otherwise.
    301 */
    302static int replay_entries_cmp(void *priv, const struct list_head *a,
    303			      const struct list_head *b)
    304{
    305	struct ubifs_info *c = priv;
    306	struct replay_entry *ra, *rb;
    307
    308	cond_resched();
    309	if (a == b)
    310		return 0;
    311
    312	ra = list_entry(a, struct replay_entry, list);
    313	rb = list_entry(b, struct replay_entry, list);
    314	ubifs_assert(c, ra->sqnum != rb->sqnum);
    315	if (ra->sqnum > rb->sqnum)
    316		return 1;
    317	return -1;
    318}
    319
    320/**
    321 * apply_replay_list - apply the replay list to the TNC.
    322 * @c: UBIFS file-system description object
    323 *
    324 * Apply all entries in the replay list to the TNC. Returns zero in case of
    325 * success and a negative error code in case of failure.
    326 */
    327static int apply_replay_list(struct ubifs_info *c)
    328{
    329	struct replay_entry *r;
    330	int err;
    331
    332	list_sort(c, &c->replay_list, &replay_entries_cmp);
    333
    334	list_for_each_entry(r, &c->replay_list, list) {
    335		cond_resched();
    336
    337		err = apply_replay_entry(c, r);
    338		if (err)
    339			return err;
    340	}
    341
    342	return 0;
    343}
    344
    345/**
    346 * destroy_replay_list - destroy the replay.
    347 * @c: UBIFS file-system description object
    348 *
    349 * Destroy the replay list.
    350 */
    351static void destroy_replay_list(struct ubifs_info *c)
    352{
    353	struct replay_entry *r, *tmp;
    354
    355	list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
    356		if (is_hash_key(c, &r->key))
    357			kfree(fname_name(&r->nm));
    358		list_del(&r->list);
    359		kfree(r);
    360	}
    361}
    362
    363/**
    364 * insert_node - insert a node to the replay list
    365 * @c: UBIFS file-system description object
    366 * @lnum: node logical eraseblock number
    367 * @offs: node offset
    368 * @len: node length
    369 * @key: node key
    370 * @sqnum: sequence number
    371 * @deletion: non-zero if this is a deletion
    372 * @used: number of bytes in use in a LEB
    373 * @old_size: truncation old size
    374 * @new_size: truncation new size
    375 *
    376 * This function inserts a scanned non-direntry node to the replay list. The
    377 * replay list contains @struct replay_entry elements, and we sort this list in
    378 * sequence number order before applying it. The replay list is applied at the
    379 * very end of the replay process. Since the list is sorted in sequence number
    380 * order, the older modifications are applied first. This function returns zero
    381 * in case of success and a negative error code in case of failure.
    382 */
    383static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
    384		       const u8 *hash, union ubifs_key *key,
    385		       unsigned long long sqnum, int deletion, int *used,
    386		       loff_t old_size, loff_t new_size)
    387{
    388	struct replay_entry *r;
    389
    390	dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
    391
    392	if (key_inum(c, key) >= c->highest_inum)
    393		c->highest_inum = key_inum(c, key);
    394
    395	r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
    396	if (!r)
    397		return -ENOMEM;
    398
    399	if (!deletion)
    400		*used += ALIGN(len, 8);
    401	r->lnum = lnum;
    402	r->offs = offs;
    403	r->len = len;
    404	ubifs_copy_hash(c, hash, r->hash);
    405	r->deletion = !!deletion;
    406	r->sqnum = sqnum;
    407	key_copy(c, key, &r->key);
    408	r->old_size = old_size;
    409	r->new_size = new_size;
    410
    411	list_add_tail(&r->list, &c->replay_list);
    412	return 0;
    413}
    414
    415/**
    416 * insert_dent - insert a directory entry node into the replay list.
    417 * @c: UBIFS file-system description object
    418 * @lnum: node logical eraseblock number
    419 * @offs: node offset
    420 * @len: node length
    421 * @key: node key
    422 * @name: directory entry name
    423 * @nlen: directory entry name length
    424 * @sqnum: sequence number
    425 * @deletion: non-zero if this is a deletion
    426 * @used: number of bytes in use in a LEB
    427 *
    428 * This function inserts a scanned directory entry node or an extended
    429 * attribute entry to the replay list. Returns zero in case of success and a
    430 * negative error code in case of failure.
    431 */
    432static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
    433		       const u8 *hash, union ubifs_key *key,
    434		       const char *name, int nlen, unsigned long long sqnum,
    435		       int deletion, int *used)
    436{
    437	struct replay_entry *r;
    438	char *nbuf;
    439
    440	dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
    441	if (key_inum(c, key) >= c->highest_inum)
    442		c->highest_inum = key_inum(c, key);
    443
    444	r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
    445	if (!r)
    446		return -ENOMEM;
    447
    448	nbuf = kmalloc(nlen + 1, GFP_KERNEL);
    449	if (!nbuf) {
    450		kfree(r);
    451		return -ENOMEM;
    452	}
    453
    454	if (!deletion)
    455		*used += ALIGN(len, 8);
    456	r->lnum = lnum;
    457	r->offs = offs;
    458	r->len = len;
    459	ubifs_copy_hash(c, hash, r->hash);
    460	r->deletion = !!deletion;
    461	r->sqnum = sqnum;
    462	key_copy(c, key, &r->key);
    463	fname_len(&r->nm) = nlen;
    464	memcpy(nbuf, name, nlen);
    465	nbuf[nlen] = '\0';
    466	fname_name(&r->nm) = nbuf;
    467
    468	list_add_tail(&r->list, &c->replay_list);
    469	return 0;
    470}
    471
    472/**
    473 * ubifs_validate_entry - validate directory or extended attribute entry node.
    474 * @c: UBIFS file-system description object
    475 * @dent: the node to validate
    476 *
    477 * This function validates directory or extended attribute entry node @dent.
    478 * Returns zero if the node is all right and a %-EINVAL if not.
    479 */
    480int ubifs_validate_entry(struct ubifs_info *c,
    481			 const struct ubifs_dent_node *dent)
    482{
    483	int key_type = key_type_flash(c, dent->key);
    484	int nlen = le16_to_cpu(dent->nlen);
    485
    486	if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 ||
    487	    dent->type >= UBIFS_ITYPES_CNT ||
    488	    nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
    489	    (key_type == UBIFS_XENT_KEY && strnlen(dent->name, nlen) != nlen) ||
    490	    le64_to_cpu(dent->inum) > MAX_INUM) {
    491		ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ?
    492			  "directory entry" : "extended attribute entry");
    493		return -EINVAL;
    494	}
    495
    496	if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
    497		ubifs_err(c, "bad key type %d", key_type);
    498		return -EINVAL;
    499	}
    500
    501	return 0;
    502}
    503
    504/**
    505 * is_last_bud - check if the bud is the last in the journal head.
    506 * @c: UBIFS file-system description object
    507 * @bud: bud description object
    508 *
    509 * This function checks if bud @bud is the last bud in its journal head. This
    510 * information is then used by 'replay_bud()' to decide whether the bud can
    511 * have corruptions or not. Indeed, only last buds can be corrupted by power
    512 * cuts. Returns %1 if this is the last bud, and %0 if not.
    513 */
    514static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
    515{
    516	struct ubifs_jhead *jh = &c->jheads[bud->jhead];
    517	struct ubifs_bud *next;
    518	uint32_t data;
    519	int err;
    520
    521	if (list_is_last(&bud->list, &jh->buds_list))
    522		return 1;
    523
    524	/*
    525	 * The following is a quirk to make sure we work correctly with UBIFS
    526	 * images used with older UBIFS.
    527	 *
    528	 * Normally, the last bud will be the last in the journal head's list
    529	 * of bud. However, there is one exception if the UBIFS image belongs
    530	 * to older UBIFS. This is fairly unlikely: one would need to use old
    531	 * UBIFS, then have a power cut exactly at the right point, and then
    532	 * try to mount this image with new UBIFS.
    533	 *
    534	 * The exception is: it is possible to have 2 buds A and B, A goes
    535	 * before B, and B is the last, bud B is contains no data, and bud A is
    536	 * corrupted at the end. The reason is that in older versions when the
    537	 * journal code switched the next bud (from A to B), it first added a
    538	 * log reference node for the new bud (B), and only after this it
    539	 * synchronized the write-buffer of current bud (A). But later this was
    540	 * changed and UBIFS started to always synchronize the write-buffer of
    541	 * the bud (A) before writing the log reference for the new bud (B).
    542	 *
    543	 * But because older UBIFS always synchronized A's write-buffer before
    544	 * writing to B, we can recognize this exceptional situation but
    545	 * checking the contents of bud B - if it is empty, then A can be
    546	 * treated as the last and we can recover it.
    547	 *
    548	 * TODO: remove this piece of code in a couple of years (today it is
    549	 * 16.05.2011).
    550	 */
    551	next = list_entry(bud->list.next, struct ubifs_bud, list);
    552	if (!list_is_last(&next->list, &jh->buds_list))
    553		return 0;
    554
    555	err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1);
    556	if (err)
    557		return 0;
    558
    559	return data == 0xFFFFFFFF;
    560}
    561
    562/* authenticate_sleb_hash is split out for stack usage */
    563static int noinline_for_stack
    564authenticate_sleb_hash(struct ubifs_info *c,
    565		       struct shash_desc *log_hash, u8 *hash)
    566{
    567	SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm);
    568
    569	hash_desc->tfm = c->hash_tfm;
    570
    571	ubifs_shash_copy_state(c, log_hash, hash_desc);
    572	return crypto_shash_final(hash_desc, hash);
    573}
    574
    575/**
    576 * authenticate_sleb - authenticate one scan LEB
    577 * @c: UBIFS file-system description object
    578 * @sleb: the scan LEB to authenticate
    579 * @log_hash:
    580 * @is_last: if true, this is the last LEB
    581 *
    582 * This function iterates over the buds of a single LEB authenticating all buds
    583 * with the authentication nodes on this LEB. Authentication nodes are written
    584 * after some buds and contain a HMAC covering the authentication node itself
    585 * and the buds between the last authentication node and the current
    586 * authentication node. It can happen that the last buds cannot be authenticated
    587 * because a powercut happened when some nodes were written but not the
    588 * corresponding authentication node. This function returns the number of nodes
    589 * that could be authenticated or a negative error code.
    590 */
    591static int authenticate_sleb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
    592			     struct shash_desc *log_hash, int is_last)
    593{
    594	int n_not_auth = 0;
    595	struct ubifs_scan_node *snod;
    596	int n_nodes = 0;
    597	int err;
    598	u8 hash[UBIFS_HASH_ARR_SZ];
    599	u8 hmac[UBIFS_HMAC_ARR_SZ];
    600
    601	if (!ubifs_authenticated(c))
    602		return sleb->nodes_cnt;
    603
    604	list_for_each_entry(snod, &sleb->nodes, list) {
    605
    606		n_nodes++;
    607
    608		if (snod->type == UBIFS_AUTH_NODE) {
    609			struct ubifs_auth_node *auth = snod->node;
    610
    611			err = authenticate_sleb_hash(c, log_hash, hash);
    612			if (err)
    613				goto out;
    614
    615			err = crypto_shash_tfm_digest(c->hmac_tfm, hash,
    616						      c->hash_len, hmac);
    617			if (err)
    618				goto out;
    619
    620			err = ubifs_check_hmac(c, auth->hmac, hmac);
    621			if (err) {
    622				err = -EPERM;
    623				goto out;
    624			}
    625			n_not_auth = 0;
    626		} else {
    627			err = crypto_shash_update(log_hash, snod->node,
    628						  snod->len);
    629			if (err)
    630				goto out;
    631			n_not_auth++;
    632		}
    633	}
    634
    635	/*
    636	 * A powercut can happen when some nodes were written, but not yet
    637	 * the corresponding authentication node. This may only happen on
    638	 * the last bud though.
    639	 */
    640	if (n_not_auth) {
    641		if (is_last) {
    642			dbg_mnt("%d unauthenticated nodes found on LEB %d, Ignoring them",
    643				n_not_auth, sleb->lnum);
    644			err = 0;
    645		} else {
    646			dbg_mnt("%d unauthenticated nodes found on non-last LEB %d",
    647				n_not_auth, sleb->lnum);
    648			err = -EPERM;
    649		}
    650	} else {
    651		err = 0;
    652	}
    653out:
    654	return err ? err : n_nodes - n_not_auth;
    655}
    656
    657/**
    658 * replay_bud - replay a bud logical eraseblock.
    659 * @c: UBIFS file-system description object
    660 * @b: bud entry which describes the bud
    661 *
    662 * This function replays bud @bud, recovers it if needed, and adds all nodes
    663 * from this bud to the replay list. Returns zero in case of success and a
    664 * negative error code in case of failure.
    665 */
    666static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
    667{
    668	int is_last = is_last_bud(c, b->bud);
    669	int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
    670	int n_nodes, n = 0;
    671	struct ubifs_scan_leb *sleb;
    672	struct ubifs_scan_node *snod;
    673
    674	dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
    675		lnum, b->bud->jhead, offs, is_last);
    676
    677	if (c->need_recovery && is_last)
    678		/*
    679		 * Recover only last LEBs in the journal heads, because power
    680		 * cuts may cause corruptions only in these LEBs, because only
    681		 * these LEBs could possibly be written to at the power cut
    682		 * time.
    683		 */
    684		sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead);
    685	else
    686		sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
    687	if (IS_ERR(sleb))
    688		return PTR_ERR(sleb);
    689
    690	n_nodes = authenticate_sleb(c, sleb, b->bud->log_hash, is_last);
    691	if (n_nodes < 0) {
    692		err = n_nodes;
    693		goto out;
    694	}
    695
    696	ubifs_shash_copy_state(c, b->bud->log_hash,
    697			       c->jheads[b->bud->jhead].log_hash);
    698
    699	/*
    700	 * The bud does not have to start from offset zero - the beginning of
    701	 * the 'lnum' LEB may contain previously committed data. One of the
    702	 * things we have to do in replay is to correctly update lprops with
    703	 * newer information about this LEB.
    704	 *
    705	 * At this point lprops thinks that this LEB has 'c->leb_size - offs'
    706	 * bytes of free space because it only contain information about
    707	 * committed data.
    708	 *
    709	 * But we know that real amount of free space is 'c->leb_size -
    710	 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
    711	 * 'sleb->endpt' is used by bud data. We have to correctly calculate
    712	 * how much of these data are dirty and update lprops with this
    713	 * information.
    714	 *
    715	 * The dirt in that LEB region is comprised of padding nodes, deletion
    716	 * nodes, truncation nodes and nodes which are obsoleted by subsequent
    717	 * nodes in this LEB. So instead of calculating clean space, we
    718	 * calculate used space ('used' variable).
    719	 */
    720
    721	list_for_each_entry(snod, &sleb->nodes, list) {
    722		u8 hash[UBIFS_HASH_ARR_SZ];
    723		int deletion = 0;
    724
    725		cond_resched();
    726
    727		if (snod->sqnum >= SQNUM_WATERMARK) {
    728			ubifs_err(c, "file system's life ended");
    729			goto out_dump;
    730		}
    731
    732		ubifs_node_calc_hash(c, snod->node, hash);
    733
    734		if (snod->sqnum > c->max_sqnum)
    735			c->max_sqnum = snod->sqnum;
    736
    737		switch (snod->type) {
    738		case UBIFS_INO_NODE:
    739		{
    740			struct ubifs_ino_node *ino = snod->node;
    741			loff_t new_size = le64_to_cpu(ino->size);
    742
    743			if (le32_to_cpu(ino->nlink) == 0)
    744				deletion = 1;
    745			err = insert_node(c, lnum, snod->offs, snod->len, hash,
    746					  &snod->key, snod->sqnum, deletion,
    747					  &used, 0, new_size);
    748			break;
    749		}
    750		case UBIFS_DATA_NODE:
    751		{
    752			struct ubifs_data_node *dn = snod->node;
    753			loff_t new_size = le32_to_cpu(dn->size) +
    754					  key_block(c, &snod->key) *
    755					  UBIFS_BLOCK_SIZE;
    756
    757			err = insert_node(c, lnum, snod->offs, snod->len, hash,
    758					  &snod->key, snod->sqnum, deletion,
    759					  &used, 0, new_size);
    760			break;
    761		}
    762		case UBIFS_DENT_NODE:
    763		case UBIFS_XENT_NODE:
    764		{
    765			struct ubifs_dent_node *dent = snod->node;
    766
    767			err = ubifs_validate_entry(c, dent);
    768			if (err)
    769				goto out_dump;
    770
    771			err = insert_dent(c, lnum, snod->offs, snod->len, hash,
    772					  &snod->key, dent->name,
    773					  le16_to_cpu(dent->nlen), snod->sqnum,
    774					  !le64_to_cpu(dent->inum), &used);
    775			break;
    776		}
    777		case UBIFS_TRUN_NODE:
    778		{
    779			struct ubifs_trun_node *trun = snod->node;
    780			loff_t old_size = le64_to_cpu(trun->old_size);
    781			loff_t new_size = le64_to_cpu(trun->new_size);
    782			union ubifs_key key;
    783
    784			/* Validate truncation node */
    785			if (old_size < 0 || old_size > c->max_inode_sz ||
    786			    new_size < 0 || new_size > c->max_inode_sz ||
    787			    old_size <= new_size) {
    788				ubifs_err(c, "bad truncation node");
    789				goto out_dump;
    790			}
    791
    792			/*
    793			 * Create a fake truncation key just to use the same
    794			 * functions which expect nodes to have keys.
    795			 */
    796			trun_key_init(c, &key, le32_to_cpu(trun->inum));
    797			err = insert_node(c, lnum, snod->offs, snod->len, hash,
    798					  &key, snod->sqnum, 1, &used,
    799					  old_size, new_size);
    800			break;
    801		}
    802		case UBIFS_AUTH_NODE:
    803			break;
    804		default:
    805			ubifs_err(c, "unexpected node type %d in bud LEB %d:%d",
    806				  snod->type, lnum, snod->offs);
    807			err = -EINVAL;
    808			goto out_dump;
    809		}
    810		if (err)
    811			goto out;
    812
    813		n++;
    814		if (n == n_nodes)
    815			break;
    816	}
    817
    818	ubifs_assert(c, ubifs_search_bud(c, lnum));
    819	ubifs_assert(c, sleb->endpt - offs >= used);
    820	ubifs_assert(c, sleb->endpt % c->min_io_size == 0);
    821
    822	b->dirty = sleb->endpt - offs - used;
    823	b->free = c->leb_size - sleb->endpt;
    824	dbg_mnt("bud LEB %d replied: dirty %d, free %d",
    825		lnum, b->dirty, b->free);
    826
    827out:
    828	ubifs_scan_destroy(sleb);
    829	return err;
    830
    831out_dump:
    832	ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs);
    833	ubifs_dump_node(c, snod->node, c->leb_size - snod->offs);
    834	ubifs_scan_destroy(sleb);
    835	return -EINVAL;
    836}
    837
    838/**
    839 * replay_buds - replay all buds.
    840 * @c: UBIFS file-system description object
    841 *
    842 * This function returns zero in case of success and a negative error code in
    843 * case of failure.
    844 */
    845static int replay_buds(struct ubifs_info *c)
    846{
    847	struct bud_entry *b;
    848	int err;
    849	unsigned long long prev_sqnum = 0;
    850
    851	list_for_each_entry(b, &c->replay_buds, list) {
    852		err = replay_bud(c, b);
    853		if (err)
    854			return err;
    855
    856		ubifs_assert(c, b->sqnum > prev_sqnum);
    857		prev_sqnum = b->sqnum;
    858	}
    859
    860	return 0;
    861}
    862
    863/**
    864 * destroy_bud_list - destroy the list of buds to replay.
    865 * @c: UBIFS file-system description object
    866 */
    867static void destroy_bud_list(struct ubifs_info *c)
    868{
    869	struct bud_entry *b;
    870
    871	while (!list_empty(&c->replay_buds)) {
    872		b = list_entry(c->replay_buds.next, struct bud_entry, list);
    873		list_del(&b->list);
    874		kfree(b);
    875	}
    876}
    877
    878/**
    879 * add_replay_bud - add a bud to the list of buds to replay.
    880 * @c: UBIFS file-system description object
    881 * @lnum: bud logical eraseblock number to replay
    882 * @offs: bud start offset
    883 * @jhead: journal head to which this bud belongs
    884 * @sqnum: reference node sequence number
    885 *
    886 * This function returns zero in case of success and a negative error code in
    887 * case of failure.
    888 */
    889static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
    890			  unsigned long long sqnum)
    891{
    892	struct ubifs_bud *bud;
    893	struct bud_entry *b;
    894	int err;
    895
    896	dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
    897
    898	bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL);
    899	if (!bud)
    900		return -ENOMEM;
    901
    902	b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
    903	if (!b) {
    904		err = -ENOMEM;
    905		goto out;
    906	}
    907
    908	bud->lnum = lnum;
    909	bud->start = offs;
    910	bud->jhead = jhead;
    911	bud->log_hash = ubifs_hash_get_desc(c);
    912	if (IS_ERR(bud->log_hash)) {
    913		err = PTR_ERR(bud->log_hash);
    914		goto out;
    915	}
    916
    917	ubifs_shash_copy_state(c, c->log_hash, bud->log_hash);
    918
    919	ubifs_add_bud(c, bud);
    920
    921	b->bud = bud;
    922	b->sqnum = sqnum;
    923	list_add_tail(&b->list, &c->replay_buds);
    924
    925	return 0;
    926out:
    927	kfree(bud);
    928	kfree(b);
    929
    930	return err;
    931}
    932
    933/**
    934 * validate_ref - validate a reference node.
    935 * @c: UBIFS file-system description object
    936 * @ref: the reference node to validate
    937 *
    938 * This function returns %1 if a bud reference already exists for the LEB. %0 is
    939 * returned if the reference node is new, otherwise %-EINVAL is returned if
    940 * validation failed.
    941 */
    942static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
    943{
    944	struct ubifs_bud *bud;
    945	int lnum = le32_to_cpu(ref->lnum);
    946	unsigned int offs = le32_to_cpu(ref->offs);
    947	unsigned int jhead = le32_to_cpu(ref->jhead);
    948
    949	/*
    950	 * ref->offs may point to the end of LEB when the journal head points
    951	 * to the end of LEB and we write reference node for it during commit.
    952	 * So this is why we require 'offs > c->leb_size'.
    953	 */
    954	if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt ||
    955	    lnum < c->main_first || offs > c->leb_size ||
    956	    offs & (c->min_io_size - 1))
    957		return -EINVAL;
    958
    959	/* Make sure we have not already looked at this bud */
    960	bud = ubifs_search_bud(c, lnum);
    961	if (bud) {
    962		if (bud->jhead == jhead && bud->start <= offs)
    963			return 1;
    964		ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs);
    965		return -EINVAL;
    966	}
    967
    968	return 0;
    969}
    970
    971/**
    972 * replay_log_leb - replay a log logical eraseblock.
    973 * @c: UBIFS file-system description object
    974 * @lnum: log logical eraseblock to replay
    975 * @offs: offset to start replaying from
    976 * @sbuf: scan buffer
    977 *
    978 * This function replays a log LEB and returns zero in case of success, %1 if
    979 * this is the last LEB in the log, and a negative error code in case of
    980 * failure.
    981 */
    982static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
    983{
    984	int err;
    985	struct ubifs_scan_leb *sleb;
    986	struct ubifs_scan_node *snod;
    987	const struct ubifs_cs_node *node;
    988
    989	dbg_mnt("replay log LEB %d:%d", lnum, offs);
    990	sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
    991	if (IS_ERR(sleb)) {
    992		if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
    993			return PTR_ERR(sleb);
    994		/*
    995		 * Note, the below function will recover this log LEB only if
    996		 * it is the last, because unclean reboots can possibly corrupt
    997		 * only the tail of the log.
    998		 */
    999		sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
   1000		if (IS_ERR(sleb))
   1001			return PTR_ERR(sleb);
   1002	}
   1003
   1004	if (sleb->nodes_cnt == 0) {
   1005		err = 1;
   1006		goto out;
   1007	}
   1008
   1009	node = sleb->buf;
   1010	snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
   1011	if (c->cs_sqnum == 0) {
   1012		/*
   1013		 * This is the first log LEB we are looking at, make sure that
   1014		 * the first node is a commit start node. Also record its
   1015		 * sequence number so that UBIFS can determine where the log
   1016		 * ends, because all nodes which were have higher sequence
   1017		 * numbers.
   1018		 */
   1019		if (snod->type != UBIFS_CS_NODE) {
   1020			ubifs_err(c, "first log node at LEB %d:%d is not CS node",
   1021				  lnum, offs);
   1022			goto out_dump;
   1023		}
   1024		if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
   1025			ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu",
   1026				  lnum, offs,
   1027				  (unsigned long long)le64_to_cpu(node->cmt_no),
   1028				  c->cmt_no);
   1029			goto out_dump;
   1030		}
   1031
   1032		c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
   1033		dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
   1034
   1035		err = ubifs_shash_init(c, c->log_hash);
   1036		if (err)
   1037			goto out;
   1038
   1039		err = ubifs_shash_update(c, c->log_hash, node, UBIFS_CS_NODE_SZ);
   1040		if (err < 0)
   1041			goto out;
   1042	}
   1043
   1044	if (snod->sqnum < c->cs_sqnum) {
   1045		/*
   1046		 * This means that we reached end of log and now
   1047		 * look to the older log data, which was already
   1048		 * committed but the eraseblock was not erased (UBIFS
   1049		 * only un-maps it). So this basically means we have to
   1050		 * exit with "end of log" code.
   1051		 */
   1052		err = 1;
   1053		goto out;
   1054	}
   1055
   1056	/* Make sure the first node sits at offset zero of the LEB */
   1057	if (snod->offs != 0) {
   1058		ubifs_err(c, "first node is not at zero offset");
   1059		goto out_dump;
   1060	}
   1061
   1062	list_for_each_entry(snod, &sleb->nodes, list) {
   1063		cond_resched();
   1064
   1065		if (snod->sqnum >= SQNUM_WATERMARK) {
   1066			ubifs_err(c, "file system's life ended");
   1067			goto out_dump;
   1068		}
   1069
   1070		if (snod->sqnum < c->cs_sqnum) {
   1071			ubifs_err(c, "bad sqnum %llu, commit sqnum %llu",
   1072				  snod->sqnum, c->cs_sqnum);
   1073			goto out_dump;
   1074		}
   1075
   1076		if (snod->sqnum > c->max_sqnum)
   1077			c->max_sqnum = snod->sqnum;
   1078
   1079		switch (snod->type) {
   1080		case UBIFS_REF_NODE: {
   1081			const struct ubifs_ref_node *ref = snod->node;
   1082
   1083			err = validate_ref(c, ref);
   1084			if (err == 1)
   1085				break; /* Already have this bud */
   1086			if (err)
   1087				goto out_dump;
   1088
   1089			err = ubifs_shash_update(c, c->log_hash, ref,
   1090						 UBIFS_REF_NODE_SZ);
   1091			if (err)
   1092				goto out;
   1093
   1094			err = add_replay_bud(c, le32_to_cpu(ref->lnum),
   1095					     le32_to_cpu(ref->offs),
   1096					     le32_to_cpu(ref->jhead),
   1097					     snod->sqnum);
   1098			if (err)
   1099				goto out;
   1100
   1101			break;
   1102		}
   1103		case UBIFS_CS_NODE:
   1104			/* Make sure it sits at the beginning of LEB */
   1105			if (snod->offs != 0) {
   1106				ubifs_err(c, "unexpected node in log");
   1107				goto out_dump;
   1108			}
   1109			break;
   1110		default:
   1111			ubifs_err(c, "unexpected node in log");
   1112			goto out_dump;
   1113		}
   1114	}
   1115
   1116	if (sleb->endpt || c->lhead_offs >= c->leb_size) {
   1117		c->lhead_lnum = lnum;
   1118		c->lhead_offs = sleb->endpt;
   1119	}
   1120
   1121	err = !sleb->endpt;
   1122out:
   1123	ubifs_scan_destroy(sleb);
   1124	return err;
   1125
   1126out_dump:
   1127	ubifs_err(c, "log error detected while replaying the log at LEB %d:%d",
   1128		  lnum, offs + snod->offs);
   1129	ubifs_dump_node(c, snod->node, c->leb_size - snod->offs);
   1130	ubifs_scan_destroy(sleb);
   1131	return -EINVAL;
   1132}
   1133
   1134/**
   1135 * take_ihead - update the status of the index head in lprops to 'taken'.
   1136 * @c: UBIFS file-system description object
   1137 *
   1138 * This function returns the amount of free space in the index head LEB or a
   1139 * negative error code.
   1140 */
   1141static int take_ihead(struct ubifs_info *c)
   1142{
   1143	const struct ubifs_lprops *lp;
   1144	int err, free;
   1145
   1146	ubifs_get_lprops(c);
   1147
   1148	lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum);
   1149	if (IS_ERR(lp)) {
   1150		err = PTR_ERR(lp);
   1151		goto out;
   1152	}
   1153
   1154	free = lp->free;
   1155
   1156	lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
   1157			     lp->flags | LPROPS_TAKEN, 0);
   1158	if (IS_ERR(lp)) {
   1159		err = PTR_ERR(lp);
   1160		goto out;
   1161	}
   1162
   1163	err = free;
   1164out:
   1165	ubifs_release_lprops(c);
   1166	return err;
   1167}
   1168
   1169/**
   1170 * ubifs_replay_journal - replay journal.
   1171 * @c: UBIFS file-system description object
   1172 *
   1173 * This function scans the journal, replays and cleans it up. It makes sure all
   1174 * memory data structures related to uncommitted journal are built (dirty TNC
   1175 * tree, tree of buds, modified lprops, etc).
   1176 */
   1177int ubifs_replay_journal(struct ubifs_info *c)
   1178{
   1179	int err, lnum, free;
   1180
   1181	BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
   1182
   1183	/* Update the status of the index head in lprops to 'taken' */
   1184	free = take_ihead(c);
   1185	if (free < 0)
   1186		return free; /* Error code */
   1187
   1188	if (c->ihead_offs != c->leb_size - free) {
   1189		ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum,
   1190			  c->ihead_offs);
   1191		return -EINVAL;
   1192	}
   1193
   1194	dbg_mnt("start replaying the journal");
   1195	c->replaying = 1;
   1196	lnum = c->ltail_lnum = c->lhead_lnum;
   1197
   1198	do {
   1199		err = replay_log_leb(c, lnum, 0, c->sbuf);
   1200		if (err == 1) {
   1201			if (lnum != c->lhead_lnum)
   1202				/* We hit the end of the log */
   1203				break;
   1204
   1205			/*
   1206			 * The head of the log must always start with the
   1207			 * "commit start" node on a properly formatted UBIFS.
   1208			 * But we found no nodes at all, which means that
   1209			 * something went wrong and we cannot proceed mounting
   1210			 * the file-system.
   1211			 */
   1212			ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted",
   1213				  lnum, 0);
   1214			err = -EINVAL;
   1215		}
   1216		if (err)
   1217			goto out;
   1218		lnum = ubifs_next_log_lnum(c, lnum);
   1219	} while (lnum != c->ltail_lnum);
   1220
   1221	err = replay_buds(c);
   1222	if (err)
   1223		goto out;
   1224
   1225	err = apply_replay_list(c);
   1226	if (err)
   1227		goto out;
   1228
   1229	err = set_buds_lprops(c);
   1230	if (err)
   1231		goto out;
   1232
   1233	/*
   1234	 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
   1235	 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
   1236	 * depend on it. This means we have to initialize it to make sure
   1237	 * budgeting works properly.
   1238	 */
   1239	c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
   1240	c->bi.uncommitted_idx *= c->max_idx_node_sz;
   1241
   1242	ubifs_assert(c, c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
   1243	dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu",
   1244		c->lhead_lnum, c->lhead_offs, c->max_sqnum,
   1245		(unsigned long)c->highest_inum);
   1246out:
   1247	destroy_replay_list(c);
   1248	destroy_bud_list(c);
   1249	c->replaying = 0;
   1250	return err;
   1251}