cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

eba.c (47060B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * Copyright (c) International Business Machines Corp., 2006
      4 *
      5 * Author: Artem Bityutskiy (Битюцкий Артём)
      6 */
      7
      8/*
      9 * The UBI Eraseblock Association (EBA) sub-system.
     10 *
     11 * This sub-system is responsible for I/O to/from logical eraseblock.
     12 *
     13 * Although in this implementation the EBA table is fully kept and managed in
     14 * RAM, which assumes poor scalability, it might be (partially) maintained on
     15 * flash in future implementations.
     16 *
     17 * The EBA sub-system implements per-logical eraseblock locking. Before
     18 * accessing a logical eraseblock it is locked for reading or writing. The
     19 * per-logical eraseblock locking is implemented by means of the lock tree. The
     20 * lock tree is an RB-tree which refers all the currently locked logical
     21 * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects.
     22 * They are indexed by (@vol_id, @lnum) pairs.
     23 *
     24 * EBA also maintains the global sequence counter which is incremented each
     25 * time a logical eraseblock is mapped to a physical eraseblock and it is
     26 * stored in the volume identifier header. This means that each VID header has
     27 * a unique sequence number. The sequence number is only increased an we assume
     28 * 64 bits is enough to never overflow.
     29 */
     30
     31#include <linux/slab.h>
     32#include <linux/crc32.h>
     33#include <linux/err.h>
     34#include "ubi.h"
     35
     36/* Number of physical eraseblocks reserved for atomic LEB change operation */
     37#define EBA_RESERVED_PEBS 1
     38
     39/**
     40 * struct ubi_eba_entry - structure encoding a single LEB -> PEB association
     41 * @pnum: the physical eraseblock number attached to the LEB
     42 *
     43 * This structure is encoding a LEB -> PEB association. Note that the LEB
     44 * number is not stored here, because it is the index used to access the
     45 * entries table.
     46 */
     47struct ubi_eba_entry {
     48	int pnum;
     49};
     50
     51/**
     52 * struct ubi_eba_table - LEB -> PEB association information
     53 * @entries: the LEB to PEB mapping (one entry per LEB).
     54 *
     55 * This structure is private to the EBA logic and should be kept here.
     56 * It is encoding the LEB to PEB association table, and is subject to
     57 * changes.
     58 */
     59struct ubi_eba_table {
     60	struct ubi_eba_entry *entries;
     61};
     62
     63/**
     64 * next_sqnum - get next sequence number.
     65 * @ubi: UBI device description object
     66 *
     67 * This function returns next sequence number to use, which is just the current
     68 * global sequence counter value. It also increases the global sequence
     69 * counter.
     70 */
     71unsigned long long ubi_next_sqnum(struct ubi_device *ubi)
     72{
     73	unsigned long long sqnum;
     74
     75	spin_lock(&ubi->ltree_lock);
     76	sqnum = ubi->global_sqnum++;
     77	spin_unlock(&ubi->ltree_lock);
     78
     79	return sqnum;
     80}
     81
     82/**
     83 * ubi_get_compat - get compatibility flags of a volume.
     84 * @ubi: UBI device description object
     85 * @vol_id: volume ID
     86 *
     87 * This function returns compatibility flags for an internal volume. User
     88 * volumes have no compatibility flags, so %0 is returned.
     89 */
     90static int ubi_get_compat(const struct ubi_device *ubi, int vol_id)
     91{
     92	if (vol_id == UBI_LAYOUT_VOLUME_ID)
     93		return UBI_LAYOUT_VOLUME_COMPAT;
     94	return 0;
     95}
     96
     97/**
     98 * ubi_eba_get_ldesc - get information about a LEB
     99 * @vol: volume description object
    100 * @lnum: logical eraseblock number
    101 * @ldesc: the LEB descriptor to fill
    102 *
    103 * Used to query information about a specific LEB.
    104 * It is currently only returning the physical position of the LEB, but will be
    105 * extended to provide more information.
    106 */
    107void ubi_eba_get_ldesc(struct ubi_volume *vol, int lnum,
    108		       struct ubi_eba_leb_desc *ldesc)
    109{
    110	ldesc->lnum = lnum;
    111	ldesc->pnum = vol->eba_tbl->entries[lnum].pnum;
    112}
    113
    114/**
    115 * ubi_eba_create_table - allocate a new EBA table and initialize it with all
    116 *			  LEBs unmapped
    117 * @vol: volume containing the EBA table to copy
    118 * @nentries: number of entries in the table
    119 *
    120 * Allocate a new EBA table and initialize it with all LEBs unmapped.
    121 * Returns a valid pointer if it succeed, an ERR_PTR() otherwise.
    122 */
    123struct ubi_eba_table *ubi_eba_create_table(struct ubi_volume *vol,
    124					   int nentries)
    125{
    126	struct ubi_eba_table *tbl;
    127	int err = -ENOMEM;
    128	int i;
    129
    130	tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
    131	if (!tbl)
    132		return ERR_PTR(-ENOMEM);
    133
    134	tbl->entries = kmalloc_array(nentries, sizeof(*tbl->entries),
    135				     GFP_KERNEL);
    136	if (!tbl->entries)
    137		goto err;
    138
    139	for (i = 0; i < nentries; i++)
    140		tbl->entries[i].pnum = UBI_LEB_UNMAPPED;
    141
    142	return tbl;
    143
    144err:
    145	kfree(tbl);
    146
    147	return ERR_PTR(err);
    148}
    149
    150/**
    151 * ubi_eba_destroy_table - destroy an EBA table
    152 * @tbl: the table to destroy
    153 *
    154 * Destroy an EBA table.
    155 */
    156void ubi_eba_destroy_table(struct ubi_eba_table *tbl)
    157{
    158	if (!tbl)
    159		return;
    160
    161	kfree(tbl->entries);
    162	kfree(tbl);
    163}
    164
    165/**
    166 * ubi_eba_copy_table - copy the EBA table attached to vol into another table
    167 * @vol: volume containing the EBA table to copy
    168 * @dst: destination
    169 * @nentries: number of entries to copy
    170 *
    171 * Copy the EBA table stored in vol into the one pointed by dst.
    172 */
    173void ubi_eba_copy_table(struct ubi_volume *vol, struct ubi_eba_table *dst,
    174			int nentries)
    175{
    176	struct ubi_eba_table *src;
    177	int i;
    178
    179	ubi_assert(dst && vol && vol->eba_tbl);
    180
    181	src = vol->eba_tbl;
    182
    183	for (i = 0; i < nentries; i++)
    184		dst->entries[i].pnum = src->entries[i].pnum;
    185}
    186
    187/**
    188 * ubi_eba_replace_table - assign a new EBA table to a volume
    189 * @vol: volume containing the EBA table to copy
    190 * @tbl: new EBA table
    191 *
    192 * Assign a new EBA table to the volume and release the old one.
    193 */
    194void ubi_eba_replace_table(struct ubi_volume *vol, struct ubi_eba_table *tbl)
    195{
    196	ubi_eba_destroy_table(vol->eba_tbl);
    197	vol->eba_tbl = tbl;
    198}
    199
    200/**
    201 * ltree_lookup - look up the lock tree.
    202 * @ubi: UBI device description object
    203 * @vol_id: volume ID
    204 * @lnum: logical eraseblock number
    205 *
    206 * This function returns a pointer to the corresponding &struct ubi_ltree_entry
    207 * object if the logical eraseblock is locked and %NULL if it is not.
    208 * @ubi->ltree_lock has to be locked.
    209 */
    210static struct ubi_ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id,
    211					    int lnum)
    212{
    213	struct rb_node *p;
    214
    215	p = ubi->ltree.rb_node;
    216	while (p) {
    217		struct ubi_ltree_entry *le;
    218
    219		le = rb_entry(p, struct ubi_ltree_entry, rb);
    220
    221		if (vol_id < le->vol_id)
    222			p = p->rb_left;
    223		else if (vol_id > le->vol_id)
    224			p = p->rb_right;
    225		else {
    226			if (lnum < le->lnum)
    227				p = p->rb_left;
    228			else if (lnum > le->lnum)
    229				p = p->rb_right;
    230			else
    231				return le;
    232		}
    233	}
    234
    235	return NULL;
    236}
    237
    238/**
    239 * ltree_add_entry - add new entry to the lock tree.
    240 * @ubi: UBI device description object
    241 * @vol_id: volume ID
    242 * @lnum: logical eraseblock number
    243 *
    244 * This function adds new entry for logical eraseblock (@vol_id, @lnum) to the
    245 * lock tree. If such entry is already there, its usage counter is increased.
    246 * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation
    247 * failed.
    248 */
    249static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi,
    250					       int vol_id, int lnum)
    251{
    252	struct ubi_ltree_entry *le, *le1, *le_free;
    253
    254	le = kmalloc(sizeof(struct ubi_ltree_entry), GFP_NOFS);
    255	if (!le)
    256		return ERR_PTR(-ENOMEM);
    257
    258	le->users = 0;
    259	init_rwsem(&le->mutex);
    260	le->vol_id = vol_id;
    261	le->lnum = lnum;
    262
    263	spin_lock(&ubi->ltree_lock);
    264	le1 = ltree_lookup(ubi, vol_id, lnum);
    265
    266	if (le1) {
    267		/*
    268		 * This logical eraseblock is already locked. The newly
    269		 * allocated lock entry is not needed.
    270		 */
    271		le_free = le;
    272		le = le1;
    273	} else {
    274		struct rb_node **p, *parent = NULL;
    275
    276		/*
    277		 * No lock entry, add the newly allocated one to the
    278		 * @ubi->ltree RB-tree.
    279		 */
    280		le_free = NULL;
    281
    282		p = &ubi->ltree.rb_node;
    283		while (*p) {
    284			parent = *p;
    285			le1 = rb_entry(parent, struct ubi_ltree_entry, rb);
    286
    287			if (vol_id < le1->vol_id)
    288				p = &(*p)->rb_left;
    289			else if (vol_id > le1->vol_id)
    290				p = &(*p)->rb_right;
    291			else {
    292				ubi_assert(lnum != le1->lnum);
    293				if (lnum < le1->lnum)
    294					p = &(*p)->rb_left;
    295				else
    296					p = &(*p)->rb_right;
    297			}
    298		}
    299
    300		rb_link_node(&le->rb, parent, p);
    301		rb_insert_color(&le->rb, &ubi->ltree);
    302	}
    303	le->users += 1;
    304	spin_unlock(&ubi->ltree_lock);
    305
    306	kfree(le_free);
    307	return le;
    308}
    309
    310/**
    311 * leb_read_lock - lock logical eraseblock for reading.
    312 * @ubi: UBI device description object
    313 * @vol_id: volume ID
    314 * @lnum: logical eraseblock number
    315 *
    316 * This function locks a logical eraseblock for reading. Returns zero in case
    317 * of success and a negative error code in case of failure.
    318 */
    319static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum)
    320{
    321	struct ubi_ltree_entry *le;
    322
    323	le = ltree_add_entry(ubi, vol_id, lnum);
    324	if (IS_ERR(le))
    325		return PTR_ERR(le);
    326	down_read(&le->mutex);
    327	return 0;
    328}
    329
    330/**
    331 * leb_read_unlock - unlock logical eraseblock.
    332 * @ubi: UBI device description object
    333 * @vol_id: volume ID
    334 * @lnum: logical eraseblock number
    335 */
    336static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum)
    337{
    338	struct ubi_ltree_entry *le;
    339
    340	spin_lock(&ubi->ltree_lock);
    341	le = ltree_lookup(ubi, vol_id, lnum);
    342	le->users -= 1;
    343	ubi_assert(le->users >= 0);
    344	up_read(&le->mutex);
    345	if (le->users == 0) {
    346		rb_erase(&le->rb, &ubi->ltree);
    347		kfree(le);
    348	}
    349	spin_unlock(&ubi->ltree_lock);
    350}
    351
    352/**
    353 * leb_write_lock - lock logical eraseblock for writing.
    354 * @ubi: UBI device description object
    355 * @vol_id: volume ID
    356 * @lnum: logical eraseblock number
    357 *
    358 * This function locks a logical eraseblock for writing. Returns zero in case
    359 * of success and a negative error code in case of failure.
    360 */
    361static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum)
    362{
    363	struct ubi_ltree_entry *le;
    364
    365	le = ltree_add_entry(ubi, vol_id, lnum);
    366	if (IS_ERR(le))
    367		return PTR_ERR(le);
    368	down_write(&le->mutex);
    369	return 0;
    370}
    371
    372/**
    373 * leb_write_trylock - try to lock logical eraseblock for writing.
    374 * @ubi: UBI device description object
    375 * @vol_id: volume ID
    376 * @lnum: logical eraseblock number
    377 *
    378 * This function locks a logical eraseblock for writing if there is no
    379 * contention and does nothing if there is contention. Returns %0 in case of
    380 * success, %1 in case of contention, and and a negative error code in case of
    381 * failure.
    382 */
    383static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
    384{
    385	struct ubi_ltree_entry *le;
    386
    387	le = ltree_add_entry(ubi, vol_id, lnum);
    388	if (IS_ERR(le))
    389		return PTR_ERR(le);
    390	if (down_write_trylock(&le->mutex))
    391		return 0;
    392
    393	/* Contention, cancel */
    394	spin_lock(&ubi->ltree_lock);
    395	le->users -= 1;
    396	ubi_assert(le->users >= 0);
    397	if (le->users == 0) {
    398		rb_erase(&le->rb, &ubi->ltree);
    399		kfree(le);
    400	}
    401	spin_unlock(&ubi->ltree_lock);
    402
    403	return 1;
    404}
    405
    406/**
    407 * leb_write_unlock - unlock logical eraseblock.
    408 * @ubi: UBI device description object
    409 * @vol_id: volume ID
    410 * @lnum: logical eraseblock number
    411 */
    412static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum)
    413{
    414	struct ubi_ltree_entry *le;
    415
    416	spin_lock(&ubi->ltree_lock);
    417	le = ltree_lookup(ubi, vol_id, lnum);
    418	le->users -= 1;
    419	ubi_assert(le->users >= 0);
    420	up_write(&le->mutex);
    421	if (le->users == 0) {
    422		rb_erase(&le->rb, &ubi->ltree);
    423		kfree(le);
    424	}
    425	spin_unlock(&ubi->ltree_lock);
    426}
    427
    428/**
    429 * ubi_eba_is_mapped - check if a LEB is mapped.
    430 * @vol: volume description object
    431 * @lnum: logical eraseblock number
    432 *
    433 * This function returns true if the LEB is mapped, false otherwise.
    434 */
    435bool ubi_eba_is_mapped(struct ubi_volume *vol, int lnum)
    436{
    437	return vol->eba_tbl->entries[lnum].pnum >= 0;
    438}
    439
    440/**
    441 * ubi_eba_unmap_leb - un-map logical eraseblock.
    442 * @ubi: UBI device description object
    443 * @vol: volume description object
    444 * @lnum: logical eraseblock number
    445 *
    446 * This function un-maps logical eraseblock @lnum and schedules corresponding
    447 * physical eraseblock for erasure. Returns zero in case of success and a
    448 * negative error code in case of failure.
    449 */
    450int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol,
    451		      int lnum)
    452{
    453	int err, pnum, vol_id = vol->vol_id;
    454
    455	if (ubi->ro_mode)
    456		return -EROFS;
    457
    458	err = leb_write_lock(ubi, vol_id, lnum);
    459	if (err)
    460		return err;
    461
    462	pnum = vol->eba_tbl->entries[lnum].pnum;
    463	if (pnum < 0)
    464		/* This logical eraseblock is already unmapped */
    465		goto out_unlock;
    466
    467	dbg_eba("erase LEB %d:%d, PEB %d", vol_id, lnum, pnum);
    468
    469	down_read(&ubi->fm_eba_sem);
    470	vol->eba_tbl->entries[lnum].pnum = UBI_LEB_UNMAPPED;
    471	up_read(&ubi->fm_eba_sem);
    472	err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 0);
    473
    474out_unlock:
    475	leb_write_unlock(ubi, vol_id, lnum);
    476	return err;
    477}
    478
    479#ifdef CONFIG_MTD_UBI_FASTMAP
    480/**
    481 * check_mapping - check and fixup a mapping
    482 * @ubi: UBI device description object
    483 * @vol: volume description object
    484 * @lnum: logical eraseblock number
    485 * @pnum: physical eraseblock number
    486 *
    487 * Checks whether a given mapping is valid. Fastmap cannot track LEB unmap
    488 * operations, if such an operation is interrupted the mapping still looks
    489 * good, but upon first read an ECC is reported to the upper layer.
    490 * Normaly during the full-scan at attach time this is fixed, for Fastmap
    491 * we have to deal with it while reading.
    492 * If the PEB behind a LEB shows this symthom we change the mapping to
    493 * %UBI_LEB_UNMAPPED and schedule the PEB for erasure.
    494 *
    495 * Returns 0 on success, negative error code in case of failure.
    496 */
    497static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
    498			 int *pnum)
    499{
    500	int err;
    501	struct ubi_vid_io_buf *vidb;
    502	struct ubi_vid_hdr *vid_hdr;
    503
    504	if (!ubi->fast_attach)
    505		return 0;
    506
    507	if (!vol->checkmap || test_bit(lnum, vol->checkmap))
    508		return 0;
    509
    510	vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS);
    511	if (!vidb)
    512		return -ENOMEM;
    513
    514	err = ubi_io_read_vid_hdr(ubi, *pnum, vidb, 0);
    515	if (err > 0 && err != UBI_IO_BITFLIPS) {
    516		int torture = 0;
    517
    518		switch (err) {
    519			case UBI_IO_FF:
    520			case UBI_IO_FF_BITFLIPS:
    521			case UBI_IO_BAD_HDR:
    522			case UBI_IO_BAD_HDR_EBADMSG:
    523				break;
    524			default:
    525				ubi_assert(0);
    526		}
    527
    528		if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_FF_BITFLIPS)
    529			torture = 1;
    530
    531		down_read(&ubi->fm_eba_sem);
    532		vol->eba_tbl->entries[lnum].pnum = UBI_LEB_UNMAPPED;
    533		up_read(&ubi->fm_eba_sem);
    534		ubi_wl_put_peb(ubi, vol->vol_id, lnum, *pnum, torture);
    535
    536		*pnum = UBI_LEB_UNMAPPED;
    537	} else if (err < 0) {
    538		ubi_err(ubi, "unable to read VID header back from PEB %i: %i",
    539			*pnum, err);
    540
    541		goto out_free;
    542	} else {
    543		int found_vol_id, found_lnum;
    544
    545		ubi_assert(err == 0 || err == UBI_IO_BITFLIPS);
    546
    547		vid_hdr = ubi_get_vid_hdr(vidb);
    548		found_vol_id = be32_to_cpu(vid_hdr->vol_id);
    549		found_lnum = be32_to_cpu(vid_hdr->lnum);
    550
    551		if (found_lnum != lnum || found_vol_id != vol->vol_id) {
    552			ubi_err(ubi, "EBA mismatch! PEB %i is LEB %i:%i instead of LEB %i:%i",
    553				*pnum, found_vol_id, found_lnum, vol->vol_id, lnum);
    554			ubi_ro_mode(ubi);
    555			err = -EINVAL;
    556			goto out_free;
    557		}
    558	}
    559
    560	set_bit(lnum, vol->checkmap);
    561	err = 0;
    562
    563out_free:
    564	ubi_free_vid_buf(vidb);
    565
    566	return err;
    567}
    568#else
    569static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
    570		  int *pnum)
    571{
    572	return 0;
    573}
    574#endif
    575
    576/**
    577 * ubi_eba_read_leb - read data.
    578 * @ubi: UBI device description object
    579 * @vol: volume description object
    580 * @lnum: logical eraseblock number
    581 * @buf: buffer to store the read data
    582 * @offset: offset from where to read
    583 * @len: how many bytes to read
    584 * @check: data CRC check flag
    585 *
    586 * If the logical eraseblock @lnum is unmapped, @buf is filled with 0xFF
    587 * bytes. The @check flag only makes sense for static volumes and forces
    588 * eraseblock data CRC checking.
    589 *
    590 * In case of success this function returns zero. In case of a static volume,
    591 * if data CRC mismatches - %-EBADMSG is returned. %-EBADMSG may also be
    592 * returned for any volume type if an ECC error was detected by the MTD device
    593 * driver. Other negative error cored may be returned in case of other errors.
    594 */
    595int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
    596		     void *buf, int offset, int len, int check)
    597{
    598	int err, pnum, scrub = 0, vol_id = vol->vol_id;
    599	struct ubi_vid_io_buf *vidb;
    600	struct ubi_vid_hdr *vid_hdr;
    601	uint32_t crc;
    602
    603	err = leb_read_lock(ubi, vol_id, lnum);
    604	if (err)
    605		return err;
    606
    607	pnum = vol->eba_tbl->entries[lnum].pnum;
    608	if (pnum >= 0) {
    609		err = check_mapping(ubi, vol, lnum, &pnum);
    610		if (err < 0)
    611			goto out_unlock;
    612	}
    613
    614	if (pnum == UBI_LEB_UNMAPPED) {
    615		/*
    616		 * The logical eraseblock is not mapped, fill the whole buffer
    617		 * with 0xFF bytes. The exception is static volumes for which
    618		 * it is an error to read unmapped logical eraseblocks.
    619		 */
    620		dbg_eba("read %d bytes from offset %d of LEB %d:%d (unmapped)",
    621			len, offset, vol_id, lnum);
    622		leb_read_unlock(ubi, vol_id, lnum);
    623		ubi_assert(vol->vol_type != UBI_STATIC_VOLUME);
    624		memset(buf, 0xFF, len);
    625		return 0;
    626	}
    627
    628	dbg_eba("read %d bytes from offset %d of LEB %d:%d, PEB %d",
    629		len, offset, vol_id, lnum, pnum);
    630
    631	if (vol->vol_type == UBI_DYNAMIC_VOLUME)
    632		check = 0;
    633
    634retry:
    635	if (check) {
    636		vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS);
    637		if (!vidb) {
    638			err = -ENOMEM;
    639			goto out_unlock;
    640		}
    641
    642		vid_hdr = ubi_get_vid_hdr(vidb);
    643
    644		err = ubi_io_read_vid_hdr(ubi, pnum, vidb, 1);
    645		if (err && err != UBI_IO_BITFLIPS) {
    646			if (err > 0) {
    647				/*
    648				 * The header is either absent or corrupted.
    649				 * The former case means there is a bug -
    650				 * switch to read-only mode just in case.
    651				 * The latter case means a real corruption - we
    652				 * may try to recover data. FIXME: but this is
    653				 * not implemented.
    654				 */
    655				if (err == UBI_IO_BAD_HDR_EBADMSG ||
    656				    err == UBI_IO_BAD_HDR) {
    657					ubi_warn(ubi, "corrupted VID header at PEB %d, LEB %d:%d",
    658						 pnum, vol_id, lnum);
    659					err = -EBADMSG;
    660				} else {
    661					/*
    662					 * Ending up here in the non-Fastmap case
    663					 * is a clear bug as the VID header had to
    664					 * be present at scan time to have it referenced.
    665					 * With fastmap the story is more complicated.
    666					 * Fastmap has the mapping info without the need
    667					 * of a full scan. So the LEB could have been
    668					 * unmapped, Fastmap cannot know this and keeps
    669					 * the LEB referenced.
    670					 * This is valid and works as the layer above UBI
    671					 * has to do bookkeeping about used/referenced
    672					 * LEBs in any case.
    673					 */
    674					if (ubi->fast_attach) {
    675						err = -EBADMSG;
    676					} else {
    677						err = -EINVAL;
    678						ubi_ro_mode(ubi);
    679					}
    680				}
    681			}
    682			goto out_free;
    683		} else if (err == UBI_IO_BITFLIPS)
    684			scrub = 1;
    685
    686		ubi_assert(lnum < be32_to_cpu(vid_hdr->used_ebs));
    687		ubi_assert(len == be32_to_cpu(vid_hdr->data_size));
    688
    689		crc = be32_to_cpu(vid_hdr->data_crc);
    690		ubi_free_vid_buf(vidb);
    691	}
    692
    693	err = ubi_io_read_data(ubi, buf, pnum, offset, len);
    694	if (err) {
    695		if (err == UBI_IO_BITFLIPS)
    696			scrub = 1;
    697		else if (mtd_is_eccerr(err)) {
    698			if (vol->vol_type == UBI_DYNAMIC_VOLUME)
    699				goto out_unlock;
    700			scrub = 1;
    701			if (!check) {
    702				ubi_msg(ubi, "force data checking");
    703				check = 1;
    704				goto retry;
    705			}
    706		} else
    707			goto out_unlock;
    708	}
    709
    710	if (check) {
    711		uint32_t crc1 = crc32(UBI_CRC32_INIT, buf, len);
    712		if (crc1 != crc) {
    713			ubi_warn(ubi, "CRC error: calculated %#08x, must be %#08x",
    714				 crc1, crc);
    715			err = -EBADMSG;
    716			goto out_unlock;
    717		}
    718	}
    719
    720	if (scrub)
    721		err = ubi_wl_scrub_peb(ubi, pnum);
    722
    723	leb_read_unlock(ubi, vol_id, lnum);
    724	return err;
    725
    726out_free:
    727	ubi_free_vid_buf(vidb);
    728out_unlock:
    729	leb_read_unlock(ubi, vol_id, lnum);
    730	return err;
    731}
    732
    733/**
    734 * ubi_eba_read_leb_sg - read data into a scatter gather list.
    735 * @ubi: UBI device description object
    736 * @vol: volume description object
    737 * @lnum: logical eraseblock number
    738 * @sgl: UBI scatter gather list to store the read data
    739 * @offset: offset from where to read
    740 * @len: how many bytes to read
    741 * @check: data CRC check flag
    742 *
    743 * This function works exactly like ubi_eba_read_leb(). But instead of
    744 * storing the read data into a buffer it writes to an UBI scatter gather
    745 * list.
    746 */
    747int ubi_eba_read_leb_sg(struct ubi_device *ubi, struct ubi_volume *vol,
    748			struct ubi_sgl *sgl, int lnum, int offset, int len,
    749			int check)
    750{
    751	int to_read;
    752	int ret;
    753	struct scatterlist *sg;
    754
    755	for (;;) {
    756		ubi_assert(sgl->list_pos < UBI_MAX_SG_COUNT);
    757		sg = &sgl->sg[sgl->list_pos];
    758		if (len < sg->length - sgl->page_pos)
    759			to_read = len;
    760		else
    761			to_read = sg->length - sgl->page_pos;
    762
    763		ret = ubi_eba_read_leb(ubi, vol, lnum,
    764				       sg_virt(sg) + sgl->page_pos, offset,
    765				       to_read, check);
    766		if (ret < 0)
    767			return ret;
    768
    769		offset += to_read;
    770		len -= to_read;
    771		if (!len) {
    772			sgl->page_pos += to_read;
    773			if (sgl->page_pos == sg->length) {
    774				sgl->list_pos++;
    775				sgl->page_pos = 0;
    776			}
    777
    778			break;
    779		}
    780
    781		sgl->list_pos++;
    782		sgl->page_pos = 0;
    783	}
    784
    785	return ret;
    786}
    787
    788/**
    789 * try_recover_peb - try to recover from write failure.
    790 * @vol: volume description object
    791 * @pnum: the physical eraseblock to recover
    792 * @lnum: logical eraseblock number
    793 * @buf: data which was not written because of the write failure
    794 * @offset: offset of the failed write
    795 * @len: how many bytes should have been written
    796 * @vidb: VID buffer
    797 * @retry: whether the caller should retry in case of failure
    798 *
    799 * This function is called in case of a write failure and moves all good data
    800 * from the potentially bad physical eraseblock to a good physical eraseblock.
    801 * This function also writes the data which was not written due to the failure.
    802 * Returns 0 in case of success, and a negative error code in case of failure.
    803 * In case of failure, the %retry parameter is set to false if this is a fatal
    804 * error (retrying won't help), and true otherwise.
    805 */
    806static int try_recover_peb(struct ubi_volume *vol, int pnum, int lnum,
    807			   const void *buf, int offset, int len,
    808			   struct ubi_vid_io_buf *vidb, bool *retry)
    809{
    810	struct ubi_device *ubi = vol->ubi;
    811	struct ubi_vid_hdr *vid_hdr;
    812	int new_pnum, err, vol_id = vol->vol_id, data_size;
    813	uint32_t crc;
    814
    815	*retry = false;
    816
    817	new_pnum = ubi_wl_get_peb(ubi);
    818	if (new_pnum < 0) {
    819		err = new_pnum;
    820		goto out_put;
    821	}
    822
    823	ubi_msg(ubi, "recover PEB %d, move data to PEB %d",
    824		pnum, new_pnum);
    825
    826	err = ubi_io_read_vid_hdr(ubi, pnum, vidb, 1);
    827	if (err && err != UBI_IO_BITFLIPS) {
    828		if (err > 0)
    829			err = -EIO;
    830		goto out_put;
    831	}
    832
    833	vid_hdr = ubi_get_vid_hdr(vidb);
    834	ubi_assert(vid_hdr->vol_type == UBI_VID_DYNAMIC);
    835
    836	mutex_lock(&ubi->buf_mutex);
    837	memset(ubi->peb_buf + offset, 0xFF, len);
    838
    839	/* Read everything before the area where the write failure happened */
    840	if (offset > 0) {
    841		err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, offset);
    842		if (err && err != UBI_IO_BITFLIPS)
    843			goto out_unlock;
    844	}
    845
    846	*retry = true;
    847
    848	memcpy(ubi->peb_buf + offset, buf, len);
    849
    850	data_size = offset + len;
    851	crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size);
    852	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
    853	vid_hdr->copy_flag = 1;
    854	vid_hdr->data_size = cpu_to_be32(data_size);
    855	vid_hdr->data_crc = cpu_to_be32(crc);
    856	err = ubi_io_write_vid_hdr(ubi, new_pnum, vidb);
    857	if (err)
    858		goto out_unlock;
    859
    860	err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size);
    861
    862out_unlock:
    863	mutex_unlock(&ubi->buf_mutex);
    864
    865	if (!err)
    866		vol->eba_tbl->entries[lnum].pnum = new_pnum;
    867
    868out_put:
    869	up_read(&ubi->fm_eba_sem);
    870
    871	if (!err) {
    872		ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1);
    873		ubi_msg(ubi, "data was successfully recovered");
    874	} else if (new_pnum >= 0) {
    875		/*
    876		 * Bad luck? This physical eraseblock is bad too? Crud. Let's
    877		 * try to get another one.
    878		 */
    879		ubi_wl_put_peb(ubi, vol_id, lnum, new_pnum, 1);
    880		ubi_warn(ubi, "failed to write to PEB %d", new_pnum);
    881	}
    882
    883	return err;
    884}
    885
    886/**
    887 * recover_peb - recover from write failure.
    888 * @ubi: UBI device description object
    889 * @pnum: the physical eraseblock to recover
    890 * @vol_id: volume ID
    891 * @lnum: logical eraseblock number
    892 * @buf: data which was not written because of the write failure
    893 * @offset: offset of the failed write
    894 * @len: how many bytes should have been written
    895 *
    896 * This function is called in case of a write failure and moves all good data
    897 * from the potentially bad physical eraseblock to a good physical eraseblock.
    898 * This function also writes the data which was not written due to the failure.
    899 * Returns 0 in case of success, and a negative error code in case of failure.
    900 * This function tries %UBI_IO_RETRIES before giving up.
    901 */
    902static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
    903		       const void *buf, int offset, int len)
    904{
    905	int err, idx = vol_id2idx(ubi, vol_id), tries;
    906	struct ubi_volume *vol = ubi->volumes[idx];
    907	struct ubi_vid_io_buf *vidb;
    908
    909	vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS);
    910	if (!vidb)
    911		return -ENOMEM;
    912
    913	for (tries = 0; tries <= UBI_IO_RETRIES; tries++) {
    914		bool retry;
    915
    916		err = try_recover_peb(vol, pnum, lnum, buf, offset, len, vidb,
    917				      &retry);
    918		if (!err || !retry)
    919			break;
    920
    921		ubi_msg(ubi, "try again");
    922	}
    923
    924	ubi_free_vid_buf(vidb);
    925
    926	return err;
    927}
    928
    929/**
    930 * try_write_vid_and_data - try to write VID header and data to a new PEB.
    931 * @vol: volume description object
    932 * @lnum: logical eraseblock number
    933 * @vidb: the VID buffer to write
    934 * @buf: buffer containing the data
    935 * @offset: where to start writing data
    936 * @len: how many bytes should be written
    937 *
    938 * This function tries to write VID header and data belonging to logical
    939 * eraseblock @lnum of volume @vol to a new physical eraseblock. Returns zero
    940 * in case of success and a negative error code in case of failure.
    941 * In case of error, it is possible that something was still written to the
    942 * flash media, but may be some garbage.
    943 */
    944static int try_write_vid_and_data(struct ubi_volume *vol, int lnum,
    945				  struct ubi_vid_io_buf *vidb, const void *buf,
    946				  int offset, int len)
    947{
    948	struct ubi_device *ubi = vol->ubi;
    949	int pnum, opnum, err, vol_id = vol->vol_id;
    950
    951	pnum = ubi_wl_get_peb(ubi);
    952	if (pnum < 0) {
    953		err = pnum;
    954		goto out_put;
    955	}
    956
    957	opnum = vol->eba_tbl->entries[lnum].pnum;
    958
    959	dbg_eba("write VID hdr and %d bytes at offset %d of LEB %d:%d, PEB %d",
    960		len, offset, vol_id, lnum, pnum);
    961
    962	err = ubi_io_write_vid_hdr(ubi, pnum, vidb);
    963	if (err) {
    964		ubi_warn(ubi, "failed to write VID header to LEB %d:%d, PEB %d",
    965			 vol_id, lnum, pnum);
    966		goto out_put;
    967	}
    968
    969	if (len) {
    970		err = ubi_io_write_data(ubi, buf, pnum, offset, len);
    971		if (err) {
    972			ubi_warn(ubi,
    973				 "failed to write %d bytes at offset %d of LEB %d:%d, PEB %d",
    974				 len, offset, vol_id, lnum, pnum);
    975			goto out_put;
    976		}
    977	}
    978
    979	vol->eba_tbl->entries[lnum].pnum = pnum;
    980
    981out_put:
    982	up_read(&ubi->fm_eba_sem);
    983
    984	if (err && pnum >= 0)
    985		err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1);
    986	else if (!err && opnum >= 0)
    987		err = ubi_wl_put_peb(ubi, vol_id, lnum, opnum, 0);
    988
    989	return err;
    990}
    991
    992/**
    993 * ubi_eba_write_leb - write data to dynamic volume.
    994 * @ubi: UBI device description object
    995 * @vol: volume description object
    996 * @lnum: logical eraseblock number
    997 * @buf: the data to write
    998 * @offset: offset within the logical eraseblock where to write
    999 * @len: how many bytes to write
   1000 *
   1001 * This function writes data to logical eraseblock @lnum of a dynamic volume
   1002 * @vol. Returns zero in case of success and a negative error code in case
   1003 * of failure. In case of error, it is possible that something was still
   1004 * written to the flash media, but may be some garbage.
   1005 * This function retries %UBI_IO_RETRIES times before giving up.
   1006 */
   1007int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
   1008		      const void *buf, int offset, int len)
   1009{
   1010	int err, pnum, tries, vol_id = vol->vol_id;
   1011	struct ubi_vid_io_buf *vidb;
   1012	struct ubi_vid_hdr *vid_hdr;
   1013
   1014	if (ubi->ro_mode)
   1015		return -EROFS;
   1016
   1017	err = leb_write_lock(ubi, vol_id, lnum);
   1018	if (err)
   1019		return err;
   1020
   1021	pnum = vol->eba_tbl->entries[lnum].pnum;
   1022	if (pnum >= 0) {
   1023		err = check_mapping(ubi, vol, lnum, &pnum);
   1024		if (err < 0)
   1025			goto out;
   1026	}
   1027
   1028	if (pnum >= 0) {
   1029		dbg_eba("write %d bytes at offset %d of LEB %d:%d, PEB %d",
   1030			len, offset, vol_id, lnum, pnum);
   1031
   1032		err = ubi_io_write_data(ubi, buf, pnum, offset, len);
   1033		if (err) {
   1034			ubi_warn(ubi, "failed to write data to PEB %d", pnum);
   1035			if (err == -EIO && ubi->bad_allowed)
   1036				err = recover_peb(ubi, pnum, vol_id, lnum, buf,
   1037						  offset, len);
   1038		}
   1039
   1040		goto out;
   1041	}
   1042
   1043	/*
   1044	 * The logical eraseblock is not mapped. We have to get a free physical
   1045	 * eraseblock and write the volume identifier header there first.
   1046	 */
   1047	vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS);
   1048	if (!vidb) {
   1049		leb_write_unlock(ubi, vol_id, lnum);
   1050		return -ENOMEM;
   1051	}
   1052
   1053	vid_hdr = ubi_get_vid_hdr(vidb);
   1054
   1055	vid_hdr->vol_type = UBI_VID_DYNAMIC;
   1056	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
   1057	vid_hdr->vol_id = cpu_to_be32(vol_id);
   1058	vid_hdr->lnum = cpu_to_be32(lnum);
   1059	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
   1060	vid_hdr->data_pad = cpu_to_be32(vol->data_pad);
   1061
   1062	for (tries = 0; tries <= UBI_IO_RETRIES; tries++) {
   1063		err = try_write_vid_and_data(vol, lnum, vidb, buf, offset, len);
   1064		if (err != -EIO || !ubi->bad_allowed)
   1065			break;
   1066
   1067		/*
   1068		 * Fortunately, this is the first write operation to this
   1069		 * physical eraseblock, so just put it and request a new one.
   1070		 * We assume that if this physical eraseblock went bad, the
   1071		 * erase code will handle that.
   1072		 */
   1073		vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
   1074		ubi_msg(ubi, "try another PEB");
   1075	}
   1076
   1077	ubi_free_vid_buf(vidb);
   1078
   1079out:
   1080	if (err)
   1081		ubi_ro_mode(ubi);
   1082
   1083	leb_write_unlock(ubi, vol_id, lnum);
   1084
   1085	return err;
   1086}
   1087
   1088/**
   1089 * ubi_eba_write_leb_st - write data to static volume.
   1090 * @ubi: UBI device description object
   1091 * @vol: volume description object
   1092 * @lnum: logical eraseblock number
   1093 * @buf: data to write
   1094 * @len: how many bytes to write
   1095 * @used_ebs: how many logical eraseblocks will this volume contain
   1096 *
   1097 * This function writes data to logical eraseblock @lnum of static volume
   1098 * @vol. The @used_ebs argument should contain total number of logical
   1099 * eraseblock in this static volume.
   1100 *
   1101 * When writing to the last logical eraseblock, the @len argument doesn't have
   1102 * to be aligned to the minimal I/O unit size. Instead, it has to be equivalent
   1103 * to the real data size, although the @buf buffer has to contain the
   1104 * alignment. In all other cases, @len has to be aligned.
   1105 *
   1106 * It is prohibited to write more than once to logical eraseblocks of static
   1107 * volumes. This function returns zero in case of success and a negative error
   1108 * code in case of failure.
   1109 */
   1110int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol,
   1111			 int lnum, const void *buf, int len, int used_ebs)
   1112{
   1113	int err, tries, data_size = len, vol_id = vol->vol_id;
   1114	struct ubi_vid_io_buf *vidb;
   1115	struct ubi_vid_hdr *vid_hdr;
   1116	uint32_t crc;
   1117
   1118	if (ubi->ro_mode)
   1119		return -EROFS;
   1120
   1121	if (lnum == used_ebs - 1)
   1122		/* If this is the last LEB @len may be unaligned */
   1123		len = ALIGN(data_size, ubi->min_io_size);
   1124	else
   1125		ubi_assert(!(len & (ubi->min_io_size - 1)));
   1126
   1127	vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS);
   1128	if (!vidb)
   1129		return -ENOMEM;
   1130
   1131	vid_hdr = ubi_get_vid_hdr(vidb);
   1132
   1133	err = leb_write_lock(ubi, vol_id, lnum);
   1134	if (err)
   1135		goto out;
   1136
   1137	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
   1138	vid_hdr->vol_id = cpu_to_be32(vol_id);
   1139	vid_hdr->lnum = cpu_to_be32(lnum);
   1140	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
   1141	vid_hdr->data_pad = cpu_to_be32(vol->data_pad);
   1142
   1143	crc = crc32(UBI_CRC32_INIT, buf, data_size);
   1144	vid_hdr->vol_type = UBI_VID_STATIC;
   1145	vid_hdr->data_size = cpu_to_be32(data_size);
   1146	vid_hdr->used_ebs = cpu_to_be32(used_ebs);
   1147	vid_hdr->data_crc = cpu_to_be32(crc);
   1148
   1149	ubi_assert(vol->eba_tbl->entries[lnum].pnum < 0);
   1150
   1151	for (tries = 0; tries <= UBI_IO_RETRIES; tries++) {
   1152		err = try_write_vid_and_data(vol, lnum, vidb, buf, 0, len);
   1153		if (err != -EIO || !ubi->bad_allowed)
   1154			break;
   1155
   1156		vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
   1157		ubi_msg(ubi, "try another PEB");
   1158	}
   1159
   1160	if (err)
   1161		ubi_ro_mode(ubi);
   1162
   1163	leb_write_unlock(ubi, vol_id, lnum);
   1164
   1165out:
   1166	ubi_free_vid_buf(vidb);
   1167
   1168	return err;
   1169}
   1170
   1171/*
   1172 * ubi_eba_atomic_leb_change - change logical eraseblock atomically.
   1173 * @ubi: UBI device description object
   1174 * @vol: volume description object
   1175 * @lnum: logical eraseblock number
   1176 * @buf: data to write
   1177 * @len: how many bytes to write
   1178 *
   1179 * This function changes the contents of a logical eraseblock atomically. @buf
   1180 * has to contain new logical eraseblock data, and @len - the length of the
   1181 * data, which has to be aligned. This function guarantees that in case of an
   1182 * unclean reboot the old contents is preserved. Returns zero in case of
   1183 * success and a negative error code in case of failure.
   1184 *
   1185 * UBI reserves one LEB for the "atomic LEB change" operation, so only one
   1186 * LEB change may be done at a time. This is ensured by @ubi->alc_mutex.
   1187 */
   1188int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol,
   1189			      int lnum, const void *buf, int len)
   1190{
   1191	int err, tries, vol_id = vol->vol_id;
   1192	struct ubi_vid_io_buf *vidb;
   1193	struct ubi_vid_hdr *vid_hdr;
   1194	uint32_t crc;
   1195
   1196	if (ubi->ro_mode)
   1197		return -EROFS;
   1198
   1199	if (len == 0) {
   1200		/*
   1201		 * Special case when data length is zero. In this case the LEB
   1202		 * has to be unmapped and mapped somewhere else.
   1203		 */
   1204		err = ubi_eba_unmap_leb(ubi, vol, lnum);
   1205		if (err)
   1206			return err;
   1207		return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0);
   1208	}
   1209
   1210	vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS);
   1211	if (!vidb)
   1212		return -ENOMEM;
   1213
   1214	vid_hdr = ubi_get_vid_hdr(vidb);
   1215
   1216	mutex_lock(&ubi->alc_mutex);
   1217	err = leb_write_lock(ubi, vol_id, lnum);
   1218	if (err)
   1219		goto out_mutex;
   1220
   1221	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
   1222	vid_hdr->vol_id = cpu_to_be32(vol_id);
   1223	vid_hdr->lnum = cpu_to_be32(lnum);
   1224	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
   1225	vid_hdr->data_pad = cpu_to_be32(vol->data_pad);
   1226
   1227	crc = crc32(UBI_CRC32_INIT, buf, len);
   1228	vid_hdr->vol_type = UBI_VID_DYNAMIC;
   1229	vid_hdr->data_size = cpu_to_be32(len);
   1230	vid_hdr->copy_flag = 1;
   1231	vid_hdr->data_crc = cpu_to_be32(crc);
   1232
   1233	dbg_eba("change LEB %d:%d", vol_id, lnum);
   1234
   1235	for (tries = 0; tries <= UBI_IO_RETRIES; tries++) {
   1236		err = try_write_vid_and_data(vol, lnum, vidb, buf, 0, len);
   1237		if (err != -EIO || !ubi->bad_allowed)
   1238			break;
   1239
   1240		vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
   1241		ubi_msg(ubi, "try another PEB");
   1242	}
   1243
   1244	/*
   1245	 * This flash device does not admit of bad eraseblocks or
   1246	 * something nasty and unexpected happened. Switch to read-only
   1247	 * mode just in case.
   1248	 */
   1249	if (err)
   1250		ubi_ro_mode(ubi);
   1251
   1252	leb_write_unlock(ubi, vol_id, lnum);
   1253
   1254out_mutex:
   1255	mutex_unlock(&ubi->alc_mutex);
   1256	ubi_free_vid_buf(vidb);
   1257	return err;
   1258}
   1259
   1260/**
   1261 * is_error_sane - check whether a read error is sane.
   1262 * @err: code of the error happened during reading
   1263 *
   1264 * This is a helper function for 'ubi_eba_copy_leb()' which is called when we
   1265 * cannot read data from the target PEB (an error @err happened). If the error
   1266 * code is sane, then we treat this error as non-fatal. Otherwise the error is
   1267 * fatal and UBI will be switched to R/O mode later.
   1268 *
   1269 * The idea is that we try not to switch to R/O mode if the read error is
   1270 * something which suggests there was a real read problem. E.g., %-EIO. Or a
   1271 * memory allocation failed (-%ENOMEM). Otherwise, it is safer to switch to R/O
   1272 * mode, simply because we do not know what happened at the MTD level, and we
   1273 * cannot handle this. E.g., the underlying driver may have become crazy, and
   1274 * it is safer to switch to R/O mode to preserve the data.
   1275 *
   1276 * And bear in mind, this is about reading from the target PEB, i.e. the PEB
   1277 * which we have just written.
   1278 */
   1279static int is_error_sane(int err)
   1280{
   1281	if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_HDR ||
   1282	    err == UBI_IO_BAD_HDR_EBADMSG || err == -ETIMEDOUT)
   1283		return 0;
   1284	return 1;
   1285}
   1286
   1287/**
   1288 * ubi_eba_copy_leb - copy logical eraseblock.
   1289 * @ubi: UBI device description object
   1290 * @from: physical eraseblock number from where to copy
   1291 * @to: physical eraseblock number where to copy
   1292 * @vidb: data structure from where the VID header is derived
   1293 *
   1294 * This function copies logical eraseblock from physical eraseblock @from to
   1295 * physical eraseblock @to. The @vid_hdr buffer may be changed by this
   1296 * function. Returns:
   1297 *   o %0 in case of success;
   1298 *   o %MOVE_CANCEL_RACE, %MOVE_TARGET_WR_ERR, %MOVE_TARGET_BITFLIPS, etc;
   1299 *   o a negative error code in case of failure.
   1300 */
   1301int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
   1302		     struct ubi_vid_io_buf *vidb)
   1303{
   1304	int err, vol_id, lnum, data_size, aldata_size, idx;
   1305	struct ubi_vid_hdr *vid_hdr = ubi_get_vid_hdr(vidb);
   1306	struct ubi_volume *vol;
   1307	uint32_t crc;
   1308
   1309	ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem));
   1310
   1311	vol_id = be32_to_cpu(vid_hdr->vol_id);
   1312	lnum = be32_to_cpu(vid_hdr->lnum);
   1313
   1314	dbg_wl("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to);
   1315
   1316	if (vid_hdr->vol_type == UBI_VID_STATIC) {
   1317		data_size = be32_to_cpu(vid_hdr->data_size);
   1318		aldata_size = ALIGN(data_size, ubi->min_io_size);
   1319	} else
   1320		data_size = aldata_size =
   1321			    ubi->leb_size - be32_to_cpu(vid_hdr->data_pad);
   1322
   1323	idx = vol_id2idx(ubi, vol_id);
   1324	spin_lock(&ubi->volumes_lock);
   1325	/*
   1326	 * Note, we may race with volume deletion, which means that the volume
   1327	 * this logical eraseblock belongs to might be being deleted. Since the
   1328	 * volume deletion un-maps all the volume's logical eraseblocks, it will
   1329	 * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish.
   1330	 */
   1331	vol = ubi->volumes[idx];
   1332	spin_unlock(&ubi->volumes_lock);
   1333	if (!vol) {
   1334		/* No need to do further work, cancel */
   1335		dbg_wl("volume %d is being removed, cancel", vol_id);
   1336		return MOVE_CANCEL_RACE;
   1337	}
   1338
   1339	/*
   1340	 * We do not want anybody to write to this logical eraseblock while we
   1341	 * are moving it, so lock it.
   1342	 *
   1343	 * Note, we are using non-waiting locking here, because we cannot sleep
   1344	 * on the LEB, since it may cause deadlocks. Indeed, imagine a task is
   1345	 * unmapping the LEB which is mapped to the PEB we are going to move
   1346	 * (@from). This task locks the LEB and goes sleep in the
   1347	 * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are
   1348	 * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the
   1349	 * LEB is already locked, we just do not move it and return
   1350	 * %MOVE_RETRY. Note, we do not return %MOVE_CANCEL_RACE here because
   1351	 * we do not know the reasons of the contention - it may be just a
   1352	 * normal I/O on this LEB, so we want to re-try.
   1353	 */
   1354	err = leb_write_trylock(ubi, vol_id, lnum);
   1355	if (err) {
   1356		dbg_wl("contention on LEB %d:%d, cancel", vol_id, lnum);
   1357		return MOVE_RETRY;
   1358	}
   1359
   1360	/*
   1361	 * The LEB might have been put meanwhile, and the task which put it is
   1362	 * probably waiting on @ubi->move_mutex. No need to continue the work,
   1363	 * cancel it.
   1364	 */
   1365	if (vol->eba_tbl->entries[lnum].pnum != from) {
   1366		dbg_wl("LEB %d:%d is no longer mapped to PEB %d, mapped to PEB %d, cancel",
   1367		       vol_id, lnum, from, vol->eba_tbl->entries[lnum].pnum);
   1368		err = MOVE_CANCEL_RACE;
   1369		goto out_unlock_leb;
   1370	}
   1371
   1372	/*
   1373	 * OK, now the LEB is locked and we can safely start moving it. Since
   1374	 * this function utilizes the @ubi->peb_buf buffer which is shared
   1375	 * with some other functions - we lock the buffer by taking the
   1376	 * @ubi->buf_mutex.
   1377	 */
   1378	mutex_lock(&ubi->buf_mutex);
   1379	dbg_wl("read %d bytes of data", aldata_size);
   1380	err = ubi_io_read_data(ubi, ubi->peb_buf, from, 0, aldata_size);
   1381	if (err && err != UBI_IO_BITFLIPS) {
   1382		ubi_warn(ubi, "error %d while reading data from PEB %d",
   1383			 err, from);
   1384		err = MOVE_SOURCE_RD_ERR;
   1385		goto out_unlock_buf;
   1386	}
   1387
   1388	/*
   1389	 * Now we have got to calculate how much data we have to copy. In
   1390	 * case of a static volume it is fairly easy - the VID header contains
   1391	 * the data size. In case of a dynamic volume it is more difficult - we
   1392	 * have to read the contents, cut 0xFF bytes from the end and copy only
   1393	 * the first part. We must do this to avoid writing 0xFF bytes as it
   1394	 * may have some side-effects. And not only this. It is important not
   1395	 * to include those 0xFFs to CRC because later the they may be filled
   1396	 * by data.
   1397	 */
   1398	if (vid_hdr->vol_type == UBI_VID_DYNAMIC)
   1399		aldata_size = data_size =
   1400			ubi_calc_data_len(ubi, ubi->peb_buf, data_size);
   1401
   1402	cond_resched();
   1403	crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size);
   1404	cond_resched();
   1405
   1406	/*
   1407	 * It may turn out to be that the whole @from physical eraseblock
   1408	 * contains only 0xFF bytes. Then we have to only write the VID header
   1409	 * and do not write any data. This also means we should not set
   1410	 * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc.
   1411	 */
   1412	if (data_size > 0) {
   1413		vid_hdr->copy_flag = 1;
   1414		vid_hdr->data_size = cpu_to_be32(data_size);
   1415		vid_hdr->data_crc = cpu_to_be32(crc);
   1416	}
   1417	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
   1418
   1419	err = ubi_io_write_vid_hdr(ubi, to, vidb);
   1420	if (err) {
   1421		if (err == -EIO)
   1422			err = MOVE_TARGET_WR_ERR;
   1423		goto out_unlock_buf;
   1424	}
   1425
   1426	cond_resched();
   1427
   1428	/* Read the VID header back and check if it was written correctly */
   1429	err = ubi_io_read_vid_hdr(ubi, to, vidb, 1);
   1430	if (err) {
   1431		if (err != UBI_IO_BITFLIPS) {
   1432			ubi_warn(ubi, "error %d while reading VID header back from PEB %d",
   1433				 err, to);
   1434			if (is_error_sane(err))
   1435				err = MOVE_TARGET_RD_ERR;
   1436		} else
   1437			err = MOVE_TARGET_BITFLIPS;
   1438		goto out_unlock_buf;
   1439	}
   1440
   1441	if (data_size > 0) {
   1442		err = ubi_io_write_data(ubi, ubi->peb_buf, to, 0, aldata_size);
   1443		if (err) {
   1444			if (err == -EIO)
   1445				err = MOVE_TARGET_WR_ERR;
   1446			goto out_unlock_buf;
   1447		}
   1448
   1449		cond_resched();
   1450	}
   1451
   1452	ubi_assert(vol->eba_tbl->entries[lnum].pnum == from);
   1453	vol->eba_tbl->entries[lnum].pnum = to;
   1454
   1455out_unlock_buf:
   1456	mutex_unlock(&ubi->buf_mutex);
   1457out_unlock_leb:
   1458	leb_write_unlock(ubi, vol_id, lnum);
   1459	return err;
   1460}
   1461
   1462/**
   1463 * print_rsvd_warning - warn about not having enough reserved PEBs.
   1464 * @ubi: UBI device description object
   1465 * @ai: UBI attach info object
   1466 *
   1467 * This is a helper function for 'ubi_eba_init()' which is called when UBI
   1468 * cannot reserve enough PEBs for bad block handling. This function makes a
   1469 * decision whether we have to print a warning or not. The algorithm is as
   1470 * follows:
   1471 *   o if this is a new UBI image, then just print the warning
   1472 *   o if this is an UBI image which has already been used for some time, print
   1473 *     a warning only if we can reserve less than 10% of the expected amount of
   1474 *     the reserved PEB.
   1475 *
   1476 * The idea is that when UBI is used, PEBs become bad, and the reserved pool
   1477 * of PEBs becomes smaller, which is normal and we do not want to scare users
   1478 * with a warning every time they attach the MTD device. This was an issue
   1479 * reported by real users.
   1480 */
   1481static void print_rsvd_warning(struct ubi_device *ubi,
   1482			       struct ubi_attach_info *ai)
   1483{
   1484	/*
   1485	 * The 1 << 18 (256KiB) number is picked randomly, just a reasonably
   1486	 * large number to distinguish between newly flashed and used images.
   1487	 */
   1488	if (ai->max_sqnum > (1 << 18)) {
   1489		int min = ubi->beb_rsvd_level / 10;
   1490
   1491		if (!min)
   1492			min = 1;
   1493		if (ubi->beb_rsvd_pebs > min)
   1494			return;
   1495	}
   1496
   1497	ubi_warn(ubi, "cannot reserve enough PEBs for bad PEB handling, reserved %d, need %d",
   1498		 ubi->beb_rsvd_pebs, ubi->beb_rsvd_level);
   1499	if (ubi->corr_peb_count)
   1500		ubi_warn(ubi, "%d PEBs are corrupted and not used",
   1501			 ubi->corr_peb_count);
   1502}
   1503
   1504/**
   1505 * self_check_eba - run a self check on the EBA table constructed by fastmap.
   1506 * @ubi: UBI device description object
   1507 * @ai_fastmap: UBI attach info object created by fastmap
   1508 * @ai_scan: UBI attach info object created by scanning
   1509 *
   1510 * Returns < 0 in case of an internal error, 0 otherwise.
   1511 * If a bad EBA table entry was found it will be printed out and
   1512 * ubi_assert() triggers.
   1513 */
   1514int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap,
   1515		   struct ubi_attach_info *ai_scan)
   1516{
   1517	int i, j, num_volumes, ret = 0;
   1518	int **scan_eba, **fm_eba;
   1519	struct ubi_ainf_volume *av;
   1520	struct ubi_volume *vol;
   1521	struct ubi_ainf_peb *aeb;
   1522	struct rb_node *rb;
   1523
   1524	num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
   1525
   1526	scan_eba = kmalloc_array(num_volumes, sizeof(*scan_eba), GFP_KERNEL);
   1527	if (!scan_eba)
   1528		return -ENOMEM;
   1529
   1530	fm_eba = kmalloc_array(num_volumes, sizeof(*fm_eba), GFP_KERNEL);
   1531	if (!fm_eba) {
   1532		kfree(scan_eba);
   1533		return -ENOMEM;
   1534	}
   1535
   1536	for (i = 0; i < num_volumes; i++) {
   1537		vol = ubi->volumes[i];
   1538		if (!vol)
   1539			continue;
   1540
   1541		scan_eba[i] = kmalloc_array(vol->reserved_pebs,
   1542					    sizeof(**scan_eba),
   1543					    GFP_KERNEL);
   1544		if (!scan_eba[i]) {
   1545			ret = -ENOMEM;
   1546			goto out_free;
   1547		}
   1548
   1549		fm_eba[i] = kmalloc_array(vol->reserved_pebs,
   1550					  sizeof(**fm_eba),
   1551					  GFP_KERNEL);
   1552		if (!fm_eba[i]) {
   1553			ret = -ENOMEM;
   1554			goto out_free;
   1555		}
   1556
   1557		for (j = 0; j < vol->reserved_pebs; j++)
   1558			scan_eba[i][j] = fm_eba[i][j] = UBI_LEB_UNMAPPED;
   1559
   1560		av = ubi_find_av(ai_scan, idx2vol_id(ubi, i));
   1561		if (!av)
   1562			continue;
   1563
   1564		ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb)
   1565			scan_eba[i][aeb->lnum] = aeb->pnum;
   1566
   1567		av = ubi_find_av(ai_fastmap, idx2vol_id(ubi, i));
   1568		if (!av)
   1569			continue;
   1570
   1571		ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb)
   1572			fm_eba[i][aeb->lnum] = aeb->pnum;
   1573
   1574		for (j = 0; j < vol->reserved_pebs; j++) {
   1575			if (scan_eba[i][j] != fm_eba[i][j]) {
   1576				if (scan_eba[i][j] == UBI_LEB_UNMAPPED ||
   1577					fm_eba[i][j] == UBI_LEB_UNMAPPED)
   1578					continue;
   1579
   1580				ubi_err(ubi, "LEB:%i:%i is PEB:%i instead of %i!",
   1581					vol->vol_id, j, fm_eba[i][j],
   1582					scan_eba[i][j]);
   1583				ubi_assert(0);
   1584			}
   1585		}
   1586	}
   1587
   1588out_free:
   1589	for (i = 0; i < num_volumes; i++) {
   1590		if (!ubi->volumes[i])
   1591			continue;
   1592
   1593		kfree(scan_eba[i]);
   1594		kfree(fm_eba[i]);
   1595	}
   1596
   1597	kfree(scan_eba);
   1598	kfree(fm_eba);
   1599	return ret;
   1600}
   1601
   1602/**
   1603 * ubi_eba_init - initialize the EBA sub-system using attaching information.
   1604 * @ubi: UBI device description object
   1605 * @ai: attaching information
   1606 *
   1607 * This function returns zero in case of success and a negative error code in
   1608 * case of failure.
   1609 */
   1610int ubi_eba_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
   1611{
   1612	int i, err, num_volumes;
   1613	struct ubi_ainf_volume *av;
   1614	struct ubi_volume *vol;
   1615	struct ubi_ainf_peb *aeb;
   1616	struct rb_node *rb;
   1617
   1618	dbg_eba("initialize EBA sub-system");
   1619
   1620	spin_lock_init(&ubi->ltree_lock);
   1621	mutex_init(&ubi->alc_mutex);
   1622	ubi->ltree = RB_ROOT;
   1623
   1624	ubi->global_sqnum = ai->max_sqnum + 1;
   1625	num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
   1626
   1627	for (i = 0; i < num_volumes; i++) {
   1628		struct ubi_eba_table *tbl;
   1629
   1630		vol = ubi->volumes[i];
   1631		if (!vol)
   1632			continue;
   1633
   1634		cond_resched();
   1635
   1636		tbl = ubi_eba_create_table(vol, vol->reserved_pebs);
   1637		if (IS_ERR(tbl)) {
   1638			err = PTR_ERR(tbl);
   1639			goto out_free;
   1640		}
   1641
   1642		ubi_eba_replace_table(vol, tbl);
   1643
   1644		av = ubi_find_av(ai, idx2vol_id(ubi, i));
   1645		if (!av)
   1646			continue;
   1647
   1648		ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) {
   1649			if (aeb->lnum >= vol->reserved_pebs) {
   1650				/*
   1651				 * This may happen in case of an unclean reboot
   1652				 * during re-size.
   1653				 */
   1654				ubi_move_aeb_to_list(av, aeb, &ai->erase);
   1655			} else {
   1656				struct ubi_eba_entry *entry;
   1657
   1658				entry = &vol->eba_tbl->entries[aeb->lnum];
   1659				entry->pnum = aeb->pnum;
   1660			}
   1661		}
   1662	}
   1663
   1664	if (ubi->avail_pebs < EBA_RESERVED_PEBS) {
   1665		ubi_err(ubi, "no enough physical eraseblocks (%d, need %d)",
   1666			ubi->avail_pebs, EBA_RESERVED_PEBS);
   1667		if (ubi->corr_peb_count)
   1668			ubi_err(ubi, "%d PEBs are corrupted and not used",
   1669				ubi->corr_peb_count);
   1670		err = -ENOSPC;
   1671		goto out_free;
   1672	}
   1673	ubi->avail_pebs -= EBA_RESERVED_PEBS;
   1674	ubi->rsvd_pebs += EBA_RESERVED_PEBS;
   1675
   1676	if (ubi->bad_allowed) {
   1677		ubi_calculate_reserved(ubi);
   1678
   1679		if (ubi->avail_pebs < ubi->beb_rsvd_level) {
   1680			/* No enough free physical eraseblocks */
   1681			ubi->beb_rsvd_pebs = ubi->avail_pebs;
   1682			print_rsvd_warning(ubi, ai);
   1683		} else
   1684			ubi->beb_rsvd_pebs = ubi->beb_rsvd_level;
   1685
   1686		ubi->avail_pebs -= ubi->beb_rsvd_pebs;
   1687		ubi->rsvd_pebs  += ubi->beb_rsvd_pebs;
   1688	}
   1689
   1690	dbg_eba("EBA sub-system is initialized");
   1691	return 0;
   1692
   1693out_free:
   1694	for (i = 0; i < num_volumes; i++) {
   1695		if (!ubi->volumes[i])
   1696			continue;
   1697		ubi_eba_replace_table(ubi->volumes[i], NULL);
   1698	}
   1699	return err;
   1700}