wl.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
wl.c (56887B)
      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * Copyright (c) International Business Machines Corp., 2006
      4 *
      5 * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner
      6 */
      7
      8/*
      9 * UBI wear-leveling sub-system.
     10 *
     11 * This sub-system is responsible for wear-leveling. It works in terms of
     12 * physical eraseblocks and erase counters and knows nothing about logical
     13 * eraseblocks, volumes, etc. From this sub-system's perspective all physical
     14 * eraseblocks are of two types - used and free. Used physical eraseblocks are
     15 * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
     16 * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
     17 *
     18 * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
     19 * header. The rest of the physical eraseblock contains only %0xFF bytes.
     20 *
     21 * When physical eraseblocks are returned to the WL sub-system by means of the
     22 * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
     23 * done asynchronously in context of the per-UBI device background thread,
     24 * which is also managed by the WL sub-system.
     25 *
     26 * The wear-leveling is ensured by means of moving the contents of used
     27 * physical eraseblocks with low erase counter to free physical eraseblocks
     28 * with high erase counter.
     29 *
     30 * If the WL sub-system fails to erase a physical eraseblock, it marks it as
     31 * bad.
     32 *
     33 * This sub-system is also responsible for scrubbing. If a bit-flip is detected
     34 * in a physical eraseblock, it has to be moved. Technically this is the same
     35 * as moving it for wear-leveling reasons.
     36 *
     37 * As it was said, for the UBI sub-system all physical eraseblocks are either
     38 * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
     39 * used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub
     40 * RB-trees, as well as (temporarily) in the @wl->pq queue.
     41 *
     42 * When the WL sub-system returns a physical eraseblock, the physical
     43 * eraseblock is protected from being moved for some "time". For this reason,
     44 * the physical eraseblock is not directly moved from the @wl->free tree to the
     45 * @wl->used tree. There is a protection queue in between where this
     46 * physical eraseblock is temporarily stored (@wl->pq).
     47 *
     48 * All this protection stuff is needed because:
     49 *  o we don't want to move physical eraseblocks just after we have given them
     50 *    to the user; instead, we first want to let users fill them up with data;
     51 *
     52 *  o there is a chance that the user will put the physical eraseblock very
     53 *    soon, so it makes sense not to move it for some time, but wait.
     54 *
     55 * Physical eraseblocks stay protected only for limited time. But the "time" is
     56 * measured in erase cycles in this case. This is implemented with help of the
     57 * protection queue. Eraseblocks are put to the tail of this queue when they
     58 * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the
     59 * head of the queue on each erase operation (for any eraseblock). So the
     60 * length of the queue defines how may (global) erase cycles PEBs are protected.
     61 *
     62 * To put it differently, each physical eraseblock has 2 main states: free and
     63 * used. The former state corresponds to the @wl->free tree. The latter state
     64 * is split up on several sub-states:
     65 * o the WL movement is allowed (@wl->used tree);
     66 * o the WL movement is disallowed (@wl->erroneous) because the PEB is
     67 *   erroneous - e.g., there was a read error;
     68 * o the WL movement is temporarily prohibited (@wl->pq queue);
     69 * o scrubbing is needed (@wl->scrub tree).
     70 *
     71 * Depending on the sub-state, wear-leveling entries of the used physical
     72 * eraseblocks may be kept in one of those structures.
     73 *
     74 * Note, in this implementation, we keep a small in-RAM object for each physical
     75 * eraseblock. This is surely not a scalable solution. But it appears to be good
     76 * enough for moderately large flashes and it is simple. In future, one may
     77 * re-work this sub-system and make it more scalable.
     78 *
     79 * At the moment this sub-system does not utilize the sequence number, which
     80 * was introduced relatively recently. But it would be wise to do this because
     81 * the sequence number of a logical eraseblock characterizes how old is it. For
     82 * example, when we move a PEB with low erase counter, and we need to pick the
     83 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
     84 * pick target PEB with an average EC if our PEB is not very "old". This is a
     85 * room for future re-works of the WL sub-system.
     86 */
     87
     88#include <linux/slab.h>
     89#include <linux/crc32.h>
     90#include <linux/freezer.h>
     91#include <linux/kthread.h>
     92#include "ubi.h"
     93#include "wl.h"
     94
     95/* Number of physical eraseblocks reserved for wear-leveling purposes */
     96#define WL_RESERVED_PEBS 1
     97
     98/*
     99 * Maximum difference between two erase counters. If this threshold is
    100 * exceeded, the WL sub-system starts moving data from used physical
    101 * eraseblocks with low erase counter to free physical eraseblocks with high
    102 * erase counter.
    103 */
    104#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
    105
    106/*
    107 * When a physical eraseblock is moved, the WL sub-system has to pick the target
    108 * physical eraseblock to move to. The simplest way would be just to pick the
    109 * one with the highest erase counter. But in certain workloads this could lead
    110 * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
    111 * situation when the picked physical eraseblock is constantly erased after the
    112 * data is written to it. So, we have a constant which limits the highest erase
    113 * counter of the free physical eraseblock to pick. Namely, the WL sub-system
    114 * does not pick eraseblocks with erase counter greater than the lowest erase
    115 * counter plus %WL_FREE_MAX_DIFF.
    116 */
    117#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
    118
    119/*
    120 * Maximum number of consecutive background thread failures which is enough to
    121 * switch to read-only mode.
    122 */
    123#define WL_MAX_FAILURES 32
    124
    125static int self_check_ec(struct ubi_device *ubi, int pnum, int ec);
    126static int self_check_in_wl_tree(const struct ubi_device *ubi,
    127				 struct ubi_wl_entry *e, struct rb_root *root);
    128static int self_check_in_pq(const struct ubi_device *ubi,
    129			    struct ubi_wl_entry *e);
    130
    131/**
    132 * wl_tree_add - add a wear-leveling entry to a WL RB-tree.
    133 * @e: the wear-leveling entry to add
    134 * @root: the root of the tree
    135 *
    136 * Note, we use (erase counter, physical eraseblock number) pairs as keys in
    137 * the @ubi->used and @ubi->free RB-trees.
    138 */
    139static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root)
    140{
    141	struct rb_node **p, *parent = NULL;
    142
    143	p = &root->rb_node;
    144	while (*p) {
    145		struct ubi_wl_entry *e1;
    146
    147		parent = *p;
    148		e1 = rb_entry(parent, struct ubi_wl_entry, u.rb);
    149
    150		if (e->ec < e1->ec)
    151			p = &(*p)->rb_left;
    152		else if (e->ec > e1->ec)
    153			p = &(*p)->rb_right;
    154		else {
    155			ubi_assert(e->pnum != e1->pnum);
    156			if (e->pnum < e1->pnum)
    157				p = &(*p)->rb_left;
    158			else
    159				p = &(*p)->rb_right;
    160		}
    161	}
    162
    163	rb_link_node(&e->u.rb, parent, p);
    164	rb_insert_color(&e->u.rb, root);
    165}
    166
    167/**
    168 * wl_tree_destroy - destroy a wear-leveling entry.
    169 * @ubi: UBI device description object
    170 * @e: the wear-leveling entry to add
    171 *
    172 * This function destroys a wear leveling entry and removes
    173 * the reference from the lookup table.
    174 */
    175static void wl_entry_destroy(struct ubi_device *ubi, struct ubi_wl_entry *e)
    176{
    177	ubi->lookuptbl[e->pnum] = NULL;
    178	kmem_cache_free(ubi_wl_entry_slab, e);
    179}
    180
    181/**
    182 * do_work - do one pending work.
    183 * @ubi: UBI device description object
    184 *
    185 * This function returns zero in case of success and a negative error code in
    186 * case of failure.
    187 */
    188static int do_work(struct ubi_device *ubi)
    189{
    190	int err;
    191	struct ubi_work *wrk;
    192
    193	cond_resched();
    194
    195	/*
    196	 * @ubi->work_sem is used to synchronize with the workers. Workers take
    197	 * it in read mode, so many of them may be doing works at a time. But
    198	 * the queue flush code has to be sure the whole queue of works is
    199	 * done, and it takes the mutex in write mode.
    200	 */
    201	down_read(&ubi->work_sem);
    202	spin_lock(&ubi->wl_lock);
    203	if (list_empty(&ubi->works)) {
    204		spin_unlock(&ubi->wl_lock);
    205		up_read(&ubi->work_sem);
    206		return 0;
    207	}
    208
    209	wrk = list_entry(ubi->works.next, struct ubi_work, list);
    210	list_del(&wrk->list);
    211	ubi->works_count -= 1;
    212	ubi_assert(ubi->works_count >= 0);
    213	spin_unlock(&ubi->wl_lock);
    214
    215	/*
    216	 * Call the worker function. Do not touch the work structure
    217	 * after this call as it will have been freed or reused by that
    218	 * time by the worker function.
    219	 */
    220	err = wrk->func(ubi, wrk, 0);
    221	if (err)
    222		ubi_err(ubi, "work failed with error code %d", err);
    223	up_read(&ubi->work_sem);
    224
    225	return err;
    226}
    227
    228/**
    229 * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree.
    230 * @e: the wear-leveling entry to check
    231 * @root: the root of the tree
    232 *
    233 * This function returns non-zero if @e is in the @root RB-tree and zero if it
    234 * is not.
    235 */
    236static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
    237{
    238	struct rb_node *p;
    239
    240	p = root->rb_node;
    241	while (p) {
    242		struct ubi_wl_entry *e1;
    243
    244		e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
    245
    246		if (e->pnum == e1->pnum) {
    247			ubi_assert(e == e1);
    248			return 1;
    249		}
    250
    251		if (e->ec < e1->ec)
    252			p = p->rb_left;
    253		else if (e->ec > e1->ec)
    254			p = p->rb_right;
    255		else {
    256			ubi_assert(e->pnum != e1->pnum);
    257			if (e->pnum < e1->pnum)
    258				p = p->rb_left;
    259			else
    260				p = p->rb_right;
    261		}
    262	}
    263
    264	return 0;
    265}
    266
    267/**
    268 * in_pq - check if a wear-leveling entry is present in the protection queue.
    269 * @ubi: UBI device description object
    270 * @e: the wear-leveling entry to check
    271 *
    272 * This function returns non-zero if @e is in the protection queue and zero
    273 * if it is not.
    274 */
    275static inline int in_pq(const struct ubi_device *ubi, struct ubi_wl_entry *e)
    276{
    277	struct ubi_wl_entry *p;
    278	int i;
    279
    280	for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i)
    281		list_for_each_entry(p, &ubi->pq[i], u.list)
    282			if (p == e)
    283				return 1;
    284
    285	return 0;
    286}
    287
    288/**
    289 * prot_queue_add - add physical eraseblock to the protection queue.
    290 * @ubi: UBI device description object
    291 * @e: the physical eraseblock to add
    292 *
    293 * This function adds @e to the tail of the protection queue @ubi->pq, where
    294 * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be
    295 * temporarily protected from the wear-leveling worker. Note, @wl->lock has to
    296 * be locked.
    297 */
    298static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e)
    299{
    300	int pq_tail = ubi->pq_head - 1;
    301
    302	if (pq_tail < 0)
    303		pq_tail = UBI_PROT_QUEUE_LEN - 1;
    304	ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN);
    305	list_add_tail(&e->u.list, &ubi->pq[pq_tail]);
    306	dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec);
    307}
    308
    309/**
    310 * find_wl_entry - find wear-leveling entry closest to certain erase counter.
    311 * @ubi: UBI device description object
    312 * @root: the RB-tree where to look for
    313 * @diff: maximum possible difference from the smallest erase counter
    314 *
    315 * This function looks for a wear leveling entry with erase counter closest to
    316 * min + @diff, where min is the smallest erase counter.
    317 */
    318static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi,
    319					  struct rb_root *root, int diff)
    320{
    321	struct rb_node *p;
    322	struct ubi_wl_entry *e;
    323	int max;
    324
    325	e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
    326	max = e->ec + diff;
    327
    328	p = root->rb_node;
    329	while (p) {
    330		struct ubi_wl_entry *e1;
    331
    332		e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
    333		if (e1->ec >= max)
    334			p = p->rb_left;
    335		else {
    336			p = p->rb_right;
    337			e = e1;
    338		}
    339	}
    340
    341	return e;
    342}
    343
    344/**
    345 * find_mean_wl_entry - find wear-leveling entry with medium erase counter.
    346 * @ubi: UBI device description object
    347 * @root: the RB-tree where to look for
    348 *
    349 * This function looks for a wear leveling entry with medium erase counter,
    350 * but not greater or equivalent than the lowest erase counter plus
    351 * %WL_FREE_MAX_DIFF/2.
    352 */
    353static struct ubi_wl_entry *find_mean_wl_entry(struct ubi_device *ubi,
    354					       struct rb_root *root)
    355{
    356	struct ubi_wl_entry *e, *first, *last;
    357
    358	first = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
    359	last = rb_entry(rb_last(root), struct ubi_wl_entry, u.rb);
    360
    361	if (last->ec - first->ec < WL_FREE_MAX_DIFF) {
    362		e = rb_entry(root->rb_node, struct ubi_wl_entry, u.rb);
    363
    364		/* If no fastmap has been written and this WL entry can be used
    365		 * as anchor PEB, hold it back and return the second best
    366		 * WL entry such that fastmap can use the anchor PEB later. */
    367		e = may_reserve_for_fm(ubi, e, root);
    368	} else
    369		e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/2);
    370
    371	return e;
    372}
    373
    374/**
    375 * wl_get_wle - get a mean wl entry to be used by ubi_wl_get_peb() or
    376 * refill_wl_user_pool().
    377 * @ubi: UBI device description object
    378 *
    379 * This function returns a a wear leveling entry in case of success and
    380 * NULL in case of failure.
    381 */
    382static struct ubi_wl_entry *wl_get_wle(struct ubi_device *ubi)
    383{
    384	struct ubi_wl_entry *e;
    385
    386	e = find_mean_wl_entry(ubi, &ubi->free);
    387	if (!e) {
    388		ubi_err(ubi, "no free eraseblocks");
    389		return NULL;
    390	}
    391
    392	self_check_in_wl_tree(ubi, e, &ubi->free);
    393
    394	/*
    395	 * Move the physical eraseblock to the protection queue where it will
    396	 * be protected from being moved for some time.
    397	 */
    398	rb_erase(&e->u.rb, &ubi->free);
    399	ubi->free_count--;
    400	dbg_wl("PEB %d EC %d", e->pnum, e->ec);
    401
    402	return e;
    403}
    404
    405/**
    406 * prot_queue_del - remove a physical eraseblock from the protection queue.
    407 * @ubi: UBI device description object
    408 * @pnum: the physical eraseblock to remove
    409 *
    410 * This function deletes PEB @pnum from the protection queue and returns zero
    411 * in case of success and %-ENODEV if the PEB was not found.
    412 */
    413static int prot_queue_del(struct ubi_device *ubi, int pnum)
    414{
    415	struct ubi_wl_entry *e;
    416
    417	e = ubi->lookuptbl[pnum];
    418	if (!e)
    419		return -ENODEV;
    420
    421	if (self_check_in_pq(ubi, e))
    422		return -ENODEV;
    423
    424	list_del(&e->u.list);
    425	dbg_wl("deleted PEB %d from the protection queue", e->pnum);
    426	return 0;
    427}
    428
    429/**
    430 * sync_erase - synchronously erase a physical eraseblock.
    431 * @ubi: UBI device description object
    432 * @e: the the physical eraseblock to erase
    433 * @torture: if the physical eraseblock has to be tortured
    434 *
    435 * This function returns zero in case of success and a negative error code in
    436 * case of failure.
    437 */
    438static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
    439		      int torture)
    440{
    441	int err;
    442	struct ubi_ec_hdr *ec_hdr;
    443	unsigned long long ec = e->ec;
    444
    445	dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec);
    446
    447	err = self_check_ec(ubi, e->pnum, e->ec);
    448	if (err)
    449		return -EINVAL;
    450
    451	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
    452	if (!ec_hdr)
    453		return -ENOMEM;
    454
    455	err = ubi_io_sync_erase(ubi, e->pnum, torture);
    456	if (err < 0)
    457		goto out_free;
    458
    459	ec += err;
    460	if (ec > UBI_MAX_ERASECOUNTER) {
    461		/*
    462		 * Erase counter overflow. Upgrade UBI and use 64-bit
    463		 * erase counters internally.
    464		 */
    465		ubi_err(ubi, "erase counter overflow at PEB %d, EC %llu",
    466			e->pnum, ec);
    467		err = -EINVAL;
    468		goto out_free;
    469	}
    470
    471	dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec);
    472
    473	ec_hdr->ec = cpu_to_be64(ec);
    474
    475	err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr);
    476	if (err)
    477		goto out_free;
    478
    479	e->ec = ec;
    480	spin_lock(&ubi->wl_lock);
    481	if (e->ec > ubi->max_ec)
    482		ubi->max_ec = e->ec;
    483	spin_unlock(&ubi->wl_lock);
    484
    485out_free:
    486	kfree(ec_hdr);
    487	return err;
    488}
    489
    490/**
    491 * serve_prot_queue - check if it is time to stop protecting PEBs.
    492 * @ubi: UBI device description object
    493 *
    494 * This function is called after each erase operation and removes PEBs from the
    495 * tail of the protection queue. These PEBs have been protected for long enough
    496 * and should be moved to the used tree.
    497 */
    498static void serve_prot_queue(struct ubi_device *ubi)
    499{
    500	struct ubi_wl_entry *e, *tmp;
    501	int count;
    502
    503	/*
    504	 * There may be several protected physical eraseblock to remove,
    505	 * process them all.
    506	 */
    507repeat:
    508	count = 0;
    509	spin_lock(&ubi->wl_lock);
    510	list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) {
    511		dbg_wl("PEB %d EC %d protection over, move to used tree",
    512			e->pnum, e->ec);
    513
    514		list_del(&e->u.list);
    515		wl_tree_add(e, &ubi->used);
    516		if (count++ > 32) {
    517			/*
    518			 * Let's be nice and avoid holding the spinlock for
    519			 * too long.
    520			 */
    521			spin_unlock(&ubi->wl_lock);
    522			cond_resched();
    523			goto repeat;
    524		}
    525	}
    526
    527	ubi->pq_head += 1;
    528	if (ubi->pq_head == UBI_PROT_QUEUE_LEN)
    529		ubi->pq_head = 0;
    530	ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN);
    531	spin_unlock(&ubi->wl_lock);
    532}
    533
    534/**
    535 * __schedule_ubi_work - schedule a work.
    536 * @ubi: UBI device description object
    537 * @wrk: the work to schedule
    538 *
    539 * This function adds a work defined by @wrk to the tail of the pending works
    540 * list. Can only be used if ubi->work_sem is already held in read mode!
    541 */
    542static void __schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
    543{
    544	spin_lock(&ubi->wl_lock);
    545	list_add_tail(&wrk->list, &ubi->works);
    546	ubi_assert(ubi->works_count >= 0);
    547	ubi->works_count += 1;
    548	if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi))
    549		wake_up_process(ubi->bgt_thread);
    550	spin_unlock(&ubi->wl_lock);
    551}
    552
    553/**
    554 * schedule_ubi_work - schedule a work.
    555 * @ubi: UBI device description object
    556 * @wrk: the work to schedule
    557 *
    558 * This function adds a work defined by @wrk to the tail of the pending works
    559 * list.
    560 */
    561static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
    562{
    563	down_read(&ubi->work_sem);
    564	__schedule_ubi_work(ubi, wrk);
    565	up_read(&ubi->work_sem);
    566}
    567
    568static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
    569			int shutdown);
    570
    571/**
    572 * schedule_erase - schedule an erase work.
    573 * @ubi: UBI device description object
    574 * @e: the WL entry of the physical eraseblock to erase
    575 * @vol_id: the volume ID that last used this PEB
    576 * @lnum: the last used logical eraseblock number for the PEB
    577 * @torture: if the physical eraseblock has to be tortured
    578 * @nested: denotes whether the work_sem is already held in read mode
    579 *
    580 * This function returns zero in case of success and a %-ENOMEM in case of
    581 * failure.
    582 */
    583static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
    584			  int vol_id, int lnum, int torture, bool nested)
    585{
    586	struct ubi_work *wl_wrk;
    587
    588	ubi_assert(e);
    589
    590	dbg_wl("schedule erasure of PEB %d, EC %d, torture %d",
    591	       e->pnum, e->ec, torture);
    592
    593	wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
    594	if (!wl_wrk)
    595		return -ENOMEM;
    596
    597	wl_wrk->func = &erase_worker;
    598	wl_wrk->e = e;
    599	wl_wrk->vol_id = vol_id;
    600	wl_wrk->lnum = lnum;
    601	wl_wrk->torture = torture;
    602
    603	if (nested)
    604		__schedule_ubi_work(ubi, wl_wrk);
    605	else
    606		schedule_ubi_work(ubi, wl_wrk);
    607	return 0;
    608}
    609
    610static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk);
    611/**
    612 * do_sync_erase - run the erase worker synchronously.
    613 * @ubi: UBI device description object
    614 * @e: the WL entry of the physical eraseblock to erase
    615 * @vol_id: the volume ID that last used this PEB
    616 * @lnum: the last used logical eraseblock number for the PEB
    617 * @torture: if the physical eraseblock has to be tortured
    618 *
    619 */
    620static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
    621			 int vol_id, int lnum, int torture)
    622{
    623	struct ubi_work wl_wrk;
    624
    625	dbg_wl("sync erase of PEB %i", e->pnum);
    626
    627	wl_wrk.e = e;
    628	wl_wrk.vol_id = vol_id;
    629	wl_wrk.lnum = lnum;
    630	wl_wrk.torture = torture;
    631
    632	return __erase_worker(ubi, &wl_wrk);
    633}
    634
    635static int ensure_wear_leveling(struct ubi_device *ubi, int nested);
    636/**
    637 * wear_leveling_worker - wear-leveling worker function.
    638 * @ubi: UBI device description object
    639 * @wrk: the work object
    640 * @shutdown: non-zero if the worker has to free memory and exit
    641 * because the WL-subsystem is shutting down
    642 *
    643 * This function copies a more worn out physical eraseblock to a less worn out
    644 * one. Returns zero in case of success and a negative error code in case of
    645 * failure.
    646 */
    647static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
    648				int shutdown)
    649{
    650	int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0;
    651	int erase = 0, keep = 0, vol_id = -1, lnum = -1;
    652	struct ubi_wl_entry *e1, *e2;
    653	struct ubi_vid_io_buf *vidb;
    654	struct ubi_vid_hdr *vid_hdr;
    655	int dst_leb_clean = 0;
    656
    657	kfree(wrk);
    658	if (shutdown)
    659		return 0;
    660
    661	vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS);
    662	if (!vidb)
    663		return -ENOMEM;
    664
    665	vid_hdr = ubi_get_vid_hdr(vidb);
    666
    667	down_read(&ubi->fm_eba_sem);
    668	mutex_lock(&ubi->move_mutex);
    669	spin_lock(&ubi->wl_lock);
    670	ubi_assert(!ubi->move_from && !ubi->move_to);
    671	ubi_assert(!ubi->move_to_put);
    672
    673#ifdef CONFIG_MTD_UBI_FASTMAP
    674	if (!next_peb_for_wl(ubi) ||
    675#else
    676	if (!ubi->free.rb_node ||
    677#endif
    678	    (!ubi->used.rb_node && !ubi->scrub.rb_node)) {
    679		/*
    680		 * No free physical eraseblocks? Well, they must be waiting in
    681		 * the queue to be erased. Cancel movement - it will be
    682		 * triggered again when a free physical eraseblock appears.
    683		 *
    684		 * No used physical eraseblocks? They must be temporarily
    685		 * protected from being moved. They will be moved to the
    686		 * @ubi->used tree later and the wear-leveling will be
    687		 * triggered again.
    688		 */
    689		dbg_wl("cancel WL, a list is empty: free %d, used %d",
    690		       !ubi->free.rb_node, !ubi->used.rb_node);
    691		goto out_cancel;
    692	}
    693
    694#ifdef CONFIG_MTD_UBI_FASTMAP
    695	e1 = find_anchor_wl_entry(&ubi->used);
    696	if (e1 && ubi->fm_anchor &&
    697	    (ubi->fm_anchor->ec - e1->ec >= UBI_WL_THRESHOLD)) {
    698		ubi->fm_do_produce_anchor = 1;
    699		/*
    700		 * fm_anchor is no longer considered a good anchor.
    701		 * NULL assignment also prevents multiple wear level checks
    702		 * of this PEB.
    703		 */
    704		wl_tree_add(ubi->fm_anchor, &ubi->free);
    705		ubi->fm_anchor = NULL;
    706		ubi->free_count++;
    707	}
    708
    709	if (ubi->fm_do_produce_anchor) {
    710		if (!e1)
    711			goto out_cancel;
    712		e2 = get_peb_for_wl(ubi);
    713		if (!e2)
    714			goto out_cancel;
    715
    716		self_check_in_wl_tree(ubi, e1, &ubi->used);
    717		rb_erase(&e1->u.rb, &ubi->used);
    718		dbg_wl("anchor-move PEB %d to PEB %d", e1->pnum, e2->pnum);
    719		ubi->fm_do_produce_anchor = 0;
    720	} else if (!ubi->scrub.rb_node) {
    721#else
    722	if (!ubi->scrub.rb_node) {
    723#endif
    724		/*
    725		 * Now pick the least worn-out used physical eraseblock and a
    726		 * highly worn-out free physical eraseblock. If the erase
    727		 * counters differ much enough, start wear-leveling.
    728		 */
    729		e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
    730		e2 = get_peb_for_wl(ubi);
    731		if (!e2)
    732			goto out_cancel;
    733
    734		if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
    735			dbg_wl("no WL needed: min used EC %d, max free EC %d",
    736			       e1->ec, e2->ec);
    737
    738			/* Give the unused PEB back */
    739			wl_tree_add(e2, &ubi->free);
    740			ubi->free_count++;
    741			goto out_cancel;
    742		}
    743		self_check_in_wl_tree(ubi, e1, &ubi->used);
    744		rb_erase(&e1->u.rb, &ubi->used);
    745		dbg_wl("move PEB %d EC %d to PEB %d EC %d",
    746		       e1->pnum, e1->ec, e2->pnum, e2->ec);
    747	} else {
    748		/* Perform scrubbing */
    749		scrubbing = 1;
    750		e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb);
    751		e2 = get_peb_for_wl(ubi);
    752		if (!e2)
    753			goto out_cancel;
    754
    755		self_check_in_wl_tree(ubi, e1, &ubi->scrub);
    756		rb_erase(&e1->u.rb, &ubi->scrub);
    757		dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
    758	}
    759
    760	ubi->move_from = e1;
    761	ubi->move_to = e2;
    762	spin_unlock(&ubi->wl_lock);
    763
    764	/*
    765	 * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum.
    766	 * We so far do not know which logical eraseblock our physical
    767	 * eraseblock (@e1) belongs to. We have to read the volume identifier
    768	 * header first.
    769	 *
    770	 * Note, we are protected from this PEB being unmapped and erased. The
    771	 * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB
    772	 * which is being moved was unmapped.
    773	 */
    774
    775	err = ubi_io_read_vid_hdr(ubi, e1->pnum, vidb, 0);
    776	if (err && err != UBI_IO_BITFLIPS) {
    777		dst_leb_clean = 1;
    778		if (err == UBI_IO_FF) {
    779			/*
    780			 * We are trying to move PEB without a VID header. UBI
    781			 * always write VID headers shortly after the PEB was
    782			 * given, so we have a situation when it has not yet
    783			 * had a chance to write it, because it was preempted.
    784			 * So add this PEB to the protection queue so far,
    785			 * because presumably more data will be written there
    786			 * (including the missing VID header), and then we'll
    787			 * move it.
    788			 */
    789			dbg_wl("PEB %d has no VID header", e1->pnum);
    790			protect = 1;
    791			goto out_not_moved;
    792		} else if (err == UBI_IO_FF_BITFLIPS) {
    793			/*
    794			 * The same situation as %UBI_IO_FF, but bit-flips were
    795			 * detected. It is better to schedule this PEB for
    796			 * scrubbing.
    797			 */
    798			dbg_wl("PEB %d has no VID header but has bit-flips",
    799			       e1->pnum);
    800			scrubbing = 1;
    801			goto out_not_moved;
    802		} else if (ubi->fast_attach && err == UBI_IO_BAD_HDR_EBADMSG) {
    803			/*
    804			 * While a full scan would detect interrupted erasures
    805			 * at attach time we can face them here when attached from
    806			 * Fastmap.
    807			 */
    808			dbg_wl("PEB %d has ECC errors, maybe from an interrupted erasure",
    809			       e1->pnum);
    810			erase = 1;
    811			goto out_not_moved;
    812		}
    813
    814		ubi_err(ubi, "error %d while reading VID header from PEB %d",
    815			err, e1->pnum);
    816		goto out_error;
    817	}
    818
    819	vol_id = be32_to_cpu(vid_hdr->vol_id);
    820	lnum = be32_to_cpu(vid_hdr->lnum);
    821
    822	err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vidb);
    823	if (err) {
    824		if (err == MOVE_CANCEL_RACE) {
    825			/*
    826			 * The LEB has not been moved because the volume is
    827			 * being deleted or the PEB has been put meanwhile. We
    828			 * should prevent this PEB from being selected for
    829			 * wear-leveling movement again, so put it to the
    830			 * protection queue.
    831			 */
    832			protect = 1;
    833			dst_leb_clean = 1;
    834			goto out_not_moved;
    835		}
    836		if (err == MOVE_RETRY) {
    837			scrubbing = 1;
    838			dst_leb_clean = 1;
    839			goto out_not_moved;
    840		}
    841		if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR ||
    842		    err == MOVE_TARGET_RD_ERR) {
    843			/*
    844			 * Target PEB had bit-flips or write error - torture it.
    845			 */
    846			torture = 1;
    847			keep = 1;
    848			goto out_not_moved;
    849		}
    850
    851		if (err == MOVE_SOURCE_RD_ERR) {
    852			/*
    853			 * An error happened while reading the source PEB. Do
    854			 * not switch to R/O mode in this case, and give the
    855			 * upper layers a possibility to recover from this,
    856			 * e.g. by unmapping corresponding LEB. Instead, just
    857			 * put this PEB to the @ubi->erroneous list to prevent
    858			 * UBI from trying to move it over and over again.
    859			 */
    860			if (ubi->erroneous_peb_count > ubi->max_erroneous) {
    861				ubi_err(ubi, "too many erroneous eraseblocks (%d)",
    862					ubi->erroneous_peb_count);
    863				goto out_error;
    864			}
    865			dst_leb_clean = 1;
    866			erroneous = 1;
    867			goto out_not_moved;
    868		}
    869
    870		if (err < 0)
    871			goto out_error;
    872
    873		ubi_assert(0);
    874	}
    875
    876	/* The PEB has been successfully moved */
    877	if (scrubbing)
    878		ubi_msg(ubi, "scrubbed PEB %d (LEB %d:%d), data moved to PEB %d",
    879			e1->pnum, vol_id, lnum, e2->pnum);
    880	ubi_free_vid_buf(vidb);
    881
    882	spin_lock(&ubi->wl_lock);
    883	if (!ubi->move_to_put) {
    884		wl_tree_add(e2, &ubi->used);
    885		e2 = NULL;
    886	}
    887	ubi->move_from = ubi->move_to = NULL;
    888	ubi->move_to_put = ubi->wl_scheduled = 0;
    889	spin_unlock(&ubi->wl_lock);
    890
    891	err = do_sync_erase(ubi, e1, vol_id, lnum, 0);
    892	if (err) {
    893		if (e2)
    894			wl_entry_destroy(ubi, e2);
    895		goto out_ro;
    896	}
    897
    898	if (e2) {
    899		/*
    900		 * Well, the target PEB was put meanwhile, schedule it for
    901		 * erasure.
    902		 */
    903		dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase",
    904		       e2->pnum, vol_id, lnum);
    905		err = do_sync_erase(ubi, e2, vol_id, lnum, 0);
    906		if (err)
    907			goto out_ro;
    908	}
    909
    910	dbg_wl("done");
    911	mutex_unlock(&ubi->move_mutex);
    912	up_read(&ubi->fm_eba_sem);
    913	return 0;
    914
    915	/*
    916	 * For some reasons the LEB was not moved, might be an error, might be
    917	 * something else. @e1 was not changed, so return it back. @e2 might
    918	 * have been changed, schedule it for erasure.
    919	 */
    920out_not_moved:
    921	if (vol_id != -1)
    922		dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)",
    923		       e1->pnum, vol_id, lnum, e2->pnum, err);
    924	else
    925		dbg_wl("cancel moving PEB %d to PEB %d (%d)",
    926		       e1->pnum, e2->pnum, err);
    927	spin_lock(&ubi->wl_lock);
    928	if (protect)
    929		prot_queue_add(ubi, e1);
    930	else if (erroneous) {
    931		wl_tree_add(e1, &ubi->erroneous);
    932		ubi->erroneous_peb_count += 1;
    933	} else if (scrubbing)
    934		wl_tree_add(e1, &ubi->scrub);
    935	else if (keep)
    936		wl_tree_add(e1, &ubi->used);
    937	if (dst_leb_clean) {
    938		wl_tree_add(e2, &ubi->free);
    939		ubi->free_count++;
    940	}
    941
    942	ubi_assert(!ubi->move_to_put);
    943	ubi->move_from = ubi->move_to = NULL;
    944	ubi->wl_scheduled = 0;
    945	spin_unlock(&ubi->wl_lock);
    946
    947	ubi_free_vid_buf(vidb);
    948	if (dst_leb_clean) {
    949		ensure_wear_leveling(ubi, 1);
    950	} else {
    951		err = do_sync_erase(ubi, e2, vol_id, lnum, torture);
    952		if (err)
    953			goto out_ro;
    954	}
    955
    956	if (erase) {
    957		err = do_sync_erase(ubi, e1, vol_id, lnum, 1);
    958		if (err)
    959			goto out_ro;
    960	}
    961
    962	mutex_unlock(&ubi->move_mutex);
    963	up_read(&ubi->fm_eba_sem);
    964	return 0;
    965
    966out_error:
    967	if (vol_id != -1)
    968		ubi_err(ubi, "error %d while moving PEB %d to PEB %d",
    969			err, e1->pnum, e2->pnum);
    970	else
    971		ubi_err(ubi, "error %d while moving PEB %d (LEB %d:%d) to PEB %d",
    972			err, e1->pnum, vol_id, lnum, e2->pnum);
    973	spin_lock(&ubi->wl_lock);
    974	ubi->move_from = ubi->move_to = NULL;
    975	ubi->move_to_put = ubi->wl_scheduled = 0;
    976	spin_unlock(&ubi->wl_lock);
    977
    978	ubi_free_vid_buf(vidb);
    979	wl_entry_destroy(ubi, e1);
    980	wl_entry_destroy(ubi, e2);
    981
    982out_ro:
    983	ubi_ro_mode(ubi);
    984	mutex_unlock(&ubi->move_mutex);
    985	up_read(&ubi->fm_eba_sem);
    986	ubi_assert(err != 0);
    987	return err < 0 ? err : -EIO;
    988
    989out_cancel:
    990	ubi->wl_scheduled = 0;
    991	spin_unlock(&ubi->wl_lock);
    992	mutex_unlock(&ubi->move_mutex);
    993	up_read(&ubi->fm_eba_sem);
    994	ubi_free_vid_buf(vidb);
    995	return 0;
    996}
    997
    998/**
    999 * ensure_wear_leveling - schedule wear-leveling if it is needed.
   1000 * @ubi: UBI device description object
   1001 * @nested: set to non-zero if this function is called from UBI worker
   1002 *
   1003 * This function checks if it is time to start wear-leveling and schedules it
   1004 * if yes. This function returns zero in case of success and a negative error
   1005 * code in case of failure.
   1006 */
   1007static int ensure_wear_leveling(struct ubi_device *ubi, int nested)
   1008{
   1009	int err = 0;
   1010	struct ubi_work *wrk;
   1011
   1012	spin_lock(&ubi->wl_lock);
   1013	if (ubi->wl_scheduled)
   1014		/* Wear-leveling is already in the work queue */
   1015		goto out_unlock;
   1016
   1017	/*
   1018	 * If the ubi->scrub tree is not empty, scrubbing is needed, and the
   1019	 * the WL worker has to be scheduled anyway.
   1020	 */
   1021	if (!ubi->scrub.rb_node) {
   1022#ifdef CONFIG_MTD_UBI_FASTMAP
   1023		if (!need_wear_leveling(ubi))
   1024			goto out_unlock;
   1025#else
   1026		struct ubi_wl_entry *e1;
   1027		struct ubi_wl_entry *e2;
   1028
   1029		if (!ubi->used.rb_node || !ubi->free.rb_node)
   1030			/* No physical eraseblocks - no deal */
   1031			goto out_unlock;
   1032
   1033		/*
   1034		 * We schedule wear-leveling only if the difference between the
   1035		 * lowest erase counter of used physical eraseblocks and a high
   1036		 * erase counter of free physical eraseblocks is greater than
   1037		 * %UBI_WL_THRESHOLD.
   1038		 */
   1039		e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
   1040		e2 = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
   1041
   1042		if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
   1043			goto out_unlock;
   1044#endif
   1045		dbg_wl("schedule wear-leveling");
   1046	} else
   1047		dbg_wl("schedule scrubbing");
   1048
   1049	ubi->wl_scheduled = 1;
   1050	spin_unlock(&ubi->wl_lock);
   1051
   1052	wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
   1053	if (!wrk) {
   1054		err = -ENOMEM;
   1055		goto out_cancel;
   1056	}
   1057
   1058	wrk->func = &wear_leveling_worker;
   1059	if (nested)
   1060		__schedule_ubi_work(ubi, wrk);
   1061	else
   1062		schedule_ubi_work(ubi, wrk);
   1063	return err;
   1064
   1065out_cancel:
   1066	spin_lock(&ubi->wl_lock);
   1067	ubi->wl_scheduled = 0;
   1068out_unlock:
   1069	spin_unlock(&ubi->wl_lock);
   1070	return err;
   1071}
   1072
   1073/**
   1074 * __erase_worker - physical eraseblock erase worker function.
   1075 * @ubi: UBI device description object
   1076 * @wl_wrk: the work object
   1077 *
   1078 * This function erases a physical eraseblock and perform torture testing if
   1079 * needed. It also takes care about marking the physical eraseblock bad if
   1080 * needed. Returns zero in case of success and a negative error code in case of
   1081 * failure.
   1082 */
   1083static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk)
   1084{
   1085	struct ubi_wl_entry *e = wl_wrk->e;
   1086	int pnum = e->pnum;
   1087	int vol_id = wl_wrk->vol_id;
   1088	int lnum = wl_wrk->lnum;
   1089	int err, available_consumed = 0;
   1090
   1091	dbg_wl("erase PEB %d EC %d LEB %d:%d",
   1092	       pnum, e->ec, wl_wrk->vol_id, wl_wrk->lnum);
   1093
   1094	err = sync_erase(ubi, e, wl_wrk->torture);
   1095	if (!err) {
   1096		spin_lock(&ubi->wl_lock);
   1097
   1098		if (!ubi->fm_disabled && !ubi->fm_anchor &&
   1099		    e->pnum < UBI_FM_MAX_START) {
   1100			/*
   1101			 * Abort anchor production, if needed it will be
   1102			 * enabled again in the wear leveling started below.
   1103			 */
   1104			ubi->fm_anchor = e;
   1105			ubi->fm_do_produce_anchor = 0;
   1106		} else {
   1107			wl_tree_add(e, &ubi->free);
   1108			ubi->free_count++;
   1109		}
   1110
   1111		spin_unlock(&ubi->wl_lock);
   1112
   1113		/*
   1114		 * One more erase operation has happened, take care about
   1115		 * protected physical eraseblocks.
   1116		 */
   1117		serve_prot_queue(ubi);
   1118
   1119		/* And take care about wear-leveling */
   1120		err = ensure_wear_leveling(ubi, 1);
   1121		return err;
   1122	}
   1123
   1124	ubi_err(ubi, "failed to erase PEB %d, error %d", pnum, err);
   1125
   1126	if (err == -EINTR || err == -ENOMEM || err == -EAGAIN ||
   1127	    err == -EBUSY) {
   1128		int err1;
   1129
   1130		/* Re-schedule the LEB for erasure */
   1131		err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false);
   1132		if (err1) {
   1133			wl_entry_destroy(ubi, e);
   1134			err = err1;
   1135			goto out_ro;
   1136		}
   1137		return err;
   1138	}
   1139
   1140	wl_entry_destroy(ubi, e);
   1141	if (err != -EIO)
   1142		/*
   1143		 * If this is not %-EIO, we have no idea what to do. Scheduling
   1144		 * this physical eraseblock for erasure again would cause
   1145		 * errors again and again. Well, lets switch to R/O mode.
   1146		 */
   1147		goto out_ro;
   1148
   1149	/* It is %-EIO, the PEB went bad */
   1150
   1151	if (!ubi->bad_allowed) {
   1152		ubi_err(ubi, "bad physical eraseblock %d detected", pnum);
   1153		goto out_ro;
   1154	}
   1155
   1156	spin_lock(&ubi->volumes_lock);
   1157	if (ubi->beb_rsvd_pebs == 0) {
   1158		if (ubi->avail_pebs == 0) {
   1159			spin_unlock(&ubi->volumes_lock);
   1160			ubi_err(ubi, "no reserved/available physical eraseblocks");
   1161			goto out_ro;
   1162		}
   1163		ubi->avail_pebs -= 1;
   1164		available_consumed = 1;
   1165	}
   1166	spin_unlock(&ubi->volumes_lock);
   1167
   1168	ubi_msg(ubi, "mark PEB %d as bad", pnum);
   1169	err = ubi_io_mark_bad(ubi, pnum);
   1170	if (err)
   1171		goto out_ro;
   1172
   1173	spin_lock(&ubi->volumes_lock);
   1174	if (ubi->beb_rsvd_pebs > 0) {
   1175		if (available_consumed) {
   1176			/*
   1177			 * The amount of reserved PEBs increased since we last
   1178			 * checked.
   1179			 */
   1180			ubi->avail_pebs += 1;
   1181			available_consumed = 0;
   1182		}
   1183		ubi->beb_rsvd_pebs -= 1;
   1184	}
   1185	ubi->bad_peb_count += 1;
   1186	ubi->good_peb_count -= 1;
   1187	ubi_calculate_reserved(ubi);
   1188	if (available_consumed)
   1189		ubi_warn(ubi, "no PEBs in the reserved pool, used an available PEB");
   1190	else if (ubi->beb_rsvd_pebs)
   1191		ubi_msg(ubi, "%d PEBs left in the reserve",
   1192			ubi->beb_rsvd_pebs);
   1193	else
   1194		ubi_warn(ubi, "last PEB from the reserve was used");
   1195	spin_unlock(&ubi->volumes_lock);
   1196
   1197	return err;
   1198
   1199out_ro:
   1200	if (available_consumed) {
   1201		spin_lock(&ubi->volumes_lock);
   1202		ubi->avail_pebs += 1;
   1203		spin_unlock(&ubi->volumes_lock);
   1204	}
   1205	ubi_ro_mode(ubi);
   1206	return err;
   1207}
   1208
   1209static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
   1210			  int shutdown)
   1211{
   1212	int ret;
   1213
   1214	if (shutdown) {
   1215		struct ubi_wl_entry *e = wl_wrk->e;
   1216
   1217		dbg_wl("cancel erasure of PEB %d EC %d", e->pnum, e->ec);
   1218		kfree(wl_wrk);
   1219		wl_entry_destroy(ubi, e);
   1220		return 0;
   1221	}
   1222
   1223	ret = __erase_worker(ubi, wl_wrk);
   1224	kfree(wl_wrk);
   1225	return ret;
   1226}
   1227
   1228/**
   1229 * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
   1230 * @ubi: UBI device description object
   1231 * @vol_id: the volume ID that last used this PEB
   1232 * @lnum: the last used logical eraseblock number for the PEB
   1233 * @pnum: physical eraseblock to return
   1234 * @torture: if this physical eraseblock has to be tortured
   1235 *
   1236 * This function is called to return physical eraseblock @pnum to the pool of
   1237 * free physical eraseblocks. The @torture flag has to be set if an I/O error
   1238 * occurred to this @pnum and it has to be tested. This function returns zero
   1239 * in case of success, and a negative error code in case of failure.
   1240 */
   1241int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum,
   1242		   int pnum, int torture)
   1243{
   1244	int err;
   1245	struct ubi_wl_entry *e;
   1246
   1247	dbg_wl("PEB %d", pnum);
   1248	ubi_assert(pnum >= 0);
   1249	ubi_assert(pnum < ubi->peb_count);
   1250
   1251	down_read(&ubi->fm_protect);
   1252
   1253retry:
   1254	spin_lock(&ubi->wl_lock);
   1255	e = ubi->lookuptbl[pnum];
   1256	if (e == ubi->move_from) {
   1257		/*
   1258		 * User is putting the physical eraseblock which was selected to
   1259		 * be moved. It will be scheduled for erasure in the
   1260		 * wear-leveling worker.
   1261		 */
   1262		dbg_wl("PEB %d is being moved, wait", pnum);
   1263		spin_unlock(&ubi->wl_lock);
   1264
   1265		/* Wait for the WL worker by taking the @ubi->move_mutex */
   1266		mutex_lock(&ubi->move_mutex);
   1267		mutex_unlock(&ubi->move_mutex);
   1268		goto retry;
   1269	} else if (e == ubi->move_to) {
   1270		/*
   1271		 * User is putting the physical eraseblock which was selected
   1272		 * as the target the data is moved to. It may happen if the EBA
   1273		 * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
   1274		 * but the WL sub-system has not put the PEB to the "used" tree
   1275		 * yet, but it is about to do this. So we just set a flag which
   1276		 * will tell the WL worker that the PEB is not needed anymore
   1277		 * and should be scheduled for erasure.
   1278		 */
   1279		dbg_wl("PEB %d is the target of data moving", pnum);
   1280		ubi_assert(!ubi->move_to_put);
   1281		ubi->move_to_put = 1;
   1282		spin_unlock(&ubi->wl_lock);
   1283		up_read(&ubi->fm_protect);
   1284		return 0;
   1285	} else {
   1286		if (in_wl_tree(e, &ubi->used)) {
   1287			self_check_in_wl_tree(ubi, e, &ubi->used);
   1288			rb_erase(&e->u.rb, &ubi->used);
   1289		} else if (in_wl_tree(e, &ubi->scrub)) {
   1290			self_check_in_wl_tree(ubi, e, &ubi->scrub);
   1291			rb_erase(&e->u.rb, &ubi->scrub);
   1292		} else if (in_wl_tree(e, &ubi->erroneous)) {
   1293			self_check_in_wl_tree(ubi, e, &ubi->erroneous);
   1294			rb_erase(&e->u.rb, &ubi->erroneous);
   1295			ubi->erroneous_peb_count -= 1;
   1296			ubi_assert(ubi->erroneous_peb_count >= 0);
   1297			/* Erroneous PEBs should be tortured */
   1298			torture = 1;
   1299		} else {
   1300			err = prot_queue_del(ubi, e->pnum);
   1301			if (err) {
   1302				ubi_err(ubi, "PEB %d not found", pnum);
   1303				ubi_ro_mode(ubi);
   1304				spin_unlock(&ubi->wl_lock);
   1305				up_read(&ubi->fm_protect);
   1306				return err;
   1307			}
   1308		}
   1309	}
   1310	spin_unlock(&ubi->wl_lock);
   1311
   1312	err = schedule_erase(ubi, e, vol_id, lnum, torture, false);
   1313	if (err) {
   1314		spin_lock(&ubi->wl_lock);
   1315		wl_tree_add(e, &ubi->used);
   1316		spin_unlock(&ubi->wl_lock);
   1317	}
   1318
   1319	up_read(&ubi->fm_protect);
   1320	return err;
   1321}
   1322
   1323/**
   1324 * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing.
   1325 * @ubi: UBI device description object
   1326 * @pnum: the physical eraseblock to schedule
   1327 *
   1328 * If a bit-flip in a physical eraseblock is detected, this physical eraseblock
   1329 * needs scrubbing. This function schedules a physical eraseblock for
   1330 * scrubbing which is done in background. This function returns zero in case of
   1331 * success and a negative error code in case of failure.
   1332 */
   1333int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
   1334{
   1335	struct ubi_wl_entry *e;
   1336
   1337	ubi_msg(ubi, "schedule PEB %d for scrubbing", pnum);
   1338
   1339retry:
   1340	spin_lock(&ubi->wl_lock);
   1341	e = ubi->lookuptbl[pnum];
   1342	if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub) ||
   1343				   in_wl_tree(e, &ubi->erroneous)) {
   1344		spin_unlock(&ubi->wl_lock);
   1345		return 0;
   1346	}
   1347
   1348	if (e == ubi->move_to) {
   1349		/*
   1350		 * This physical eraseblock was used to move data to. The data
   1351		 * was moved but the PEB was not yet inserted to the proper
   1352		 * tree. We should just wait a little and let the WL worker
   1353		 * proceed.
   1354		 */
   1355		spin_unlock(&ubi->wl_lock);
   1356		dbg_wl("the PEB %d is not in proper tree, retry", pnum);
   1357		yield();
   1358		goto retry;
   1359	}
   1360
   1361	if (in_wl_tree(e, &ubi->used)) {
   1362		self_check_in_wl_tree(ubi, e, &ubi->used);
   1363		rb_erase(&e->u.rb, &ubi->used);
   1364	} else {
   1365		int err;
   1366
   1367		err = prot_queue_del(ubi, e->pnum);
   1368		if (err) {
   1369			ubi_err(ubi, "PEB %d not found", pnum);
   1370			ubi_ro_mode(ubi);
   1371			spin_unlock(&ubi->wl_lock);
   1372			return err;
   1373		}
   1374	}
   1375
   1376	wl_tree_add(e, &ubi->scrub);
   1377	spin_unlock(&ubi->wl_lock);
   1378
   1379	/*
   1380	 * Technically scrubbing is the same as wear-leveling, so it is done
   1381	 * by the WL worker.
   1382	 */
   1383	return ensure_wear_leveling(ubi, 0);
   1384}
   1385
   1386/**
   1387 * ubi_wl_flush - flush all pending works.
   1388 * @ubi: UBI device description object
   1389 * @vol_id: the volume id to flush for
   1390 * @lnum: the logical eraseblock number to flush for
   1391 *
   1392 * This function executes all pending works for a particular volume id /
   1393 * logical eraseblock number pair. If either value is set to %UBI_ALL, then it
   1394 * acts as a wildcard for all of the corresponding volume numbers or logical
   1395 * eraseblock numbers. It returns zero in case of success and a negative error
   1396 * code in case of failure.
   1397 */
   1398int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum)
   1399{
   1400	int err = 0;
   1401	int found = 1;
   1402
   1403	/*
   1404	 * Erase while the pending works queue is not empty, but not more than
   1405	 * the number of currently pending works.
   1406	 */
   1407	dbg_wl("flush pending work for LEB %d:%d (%d pending works)",
   1408	       vol_id, lnum, ubi->works_count);
   1409
   1410	while (found) {
   1411		struct ubi_work *wrk, *tmp;
   1412		found = 0;
   1413
   1414		down_read(&ubi->work_sem);
   1415		spin_lock(&ubi->wl_lock);
   1416		list_for_each_entry_safe(wrk, tmp, &ubi->works, list) {
   1417			if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) &&
   1418			    (lnum == UBI_ALL || wrk->lnum == lnum)) {
   1419				list_del(&wrk->list);
   1420				ubi->works_count -= 1;
   1421				ubi_assert(ubi->works_count >= 0);
   1422				spin_unlock(&ubi->wl_lock);
   1423
   1424				err = wrk->func(ubi, wrk, 0);
   1425				if (err) {
   1426					up_read(&ubi->work_sem);
   1427					return err;
   1428				}
   1429
   1430				spin_lock(&ubi->wl_lock);
   1431				found = 1;
   1432				break;
   1433			}
   1434		}
   1435		spin_unlock(&ubi->wl_lock);
   1436		up_read(&ubi->work_sem);
   1437	}
   1438
   1439	/*
   1440	 * Make sure all the works which have been done in parallel are
   1441	 * finished.
   1442	 */
   1443	down_write(&ubi->work_sem);
   1444	up_write(&ubi->work_sem);
   1445
   1446	return err;
   1447}
   1448
   1449static bool scrub_possible(struct ubi_device *ubi, struct ubi_wl_entry *e)
   1450{
   1451	if (in_wl_tree(e, &ubi->scrub))
   1452		return false;
   1453	else if (in_wl_tree(e, &ubi->erroneous))
   1454		return false;
   1455	else if (ubi->move_from == e)
   1456		return false;
   1457	else if (ubi->move_to == e)
   1458		return false;
   1459
   1460	return true;
   1461}
   1462
   1463/**
   1464 * ubi_bitflip_check - Check an eraseblock for bitflips and scrub it if needed.
   1465 * @ubi: UBI device description object
   1466 * @pnum: the physical eraseblock to schedule
   1467 * @force: dont't read the block, assume bitflips happened and take action.
   1468 *
   1469 * This function reads the given eraseblock and checks if bitflips occured.
   1470 * In case of bitflips, the eraseblock is scheduled for scrubbing.
   1471 * If scrubbing is forced with @force, the eraseblock is not read,
   1472 * but scheduled for scrubbing right away.
   1473 *
   1474 * Returns:
   1475 * %EINVAL, PEB is out of range
   1476 * %ENOENT, PEB is no longer used by UBI
   1477 * %EBUSY, PEB cannot be checked now or a check is currently running on it
   1478 * %EAGAIN, bit flips happened but scrubbing is currently not possible
   1479 * %EUCLEAN, bit flips happened and PEB is scheduled for scrubbing
   1480 * %0, no bit flips detected
   1481 */
   1482int ubi_bitflip_check(struct ubi_device *ubi, int pnum, int force)
   1483{
   1484	int err = 0;
   1485	struct ubi_wl_entry *e;
   1486
   1487	if (pnum < 0 || pnum >= ubi->peb_count) {
   1488		err = -EINVAL;
   1489		goto out;
   1490	}
   1491
   1492	/*
   1493	 * Pause all parallel work, otherwise it can happen that the
   1494	 * erase worker frees a wl entry under us.
   1495	 */
   1496	down_write(&ubi->work_sem);
   1497
   1498	/*
   1499	 * Make sure that the wl entry does not change state while
   1500	 * inspecting it.
   1501	 */
   1502	spin_lock(&ubi->wl_lock);
   1503	e = ubi->lookuptbl[pnum];
   1504	if (!e) {
   1505		spin_unlock(&ubi->wl_lock);
   1506		err = -ENOENT;
   1507		goto out_resume;
   1508	}
   1509
   1510	/*
   1511	 * Does it make sense to check this PEB?
   1512	 */
   1513	if (!scrub_possible(ubi, e)) {
   1514		spin_unlock(&ubi->wl_lock);
   1515		err = -EBUSY;
   1516		goto out_resume;
   1517	}
   1518	spin_unlock(&ubi->wl_lock);
   1519
   1520	if (!force) {
   1521		mutex_lock(&ubi->buf_mutex);
   1522		err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size);
   1523		mutex_unlock(&ubi->buf_mutex);
   1524	}
   1525
   1526	if (force || err == UBI_IO_BITFLIPS) {
   1527		/*
   1528		 * Okay, bit flip happened, let's figure out what we can do.
   1529		 */
   1530		spin_lock(&ubi->wl_lock);
   1531
   1532		/*
   1533		 * Recheck. We released wl_lock, UBI might have killed the
   1534		 * wl entry under us.
   1535		 */
   1536		e = ubi->lookuptbl[pnum];
   1537		if (!e) {
   1538			spin_unlock(&ubi->wl_lock);
   1539			err = -ENOENT;
   1540			goto out_resume;
   1541		}
   1542
   1543		/*
   1544		 * Need to re-check state
   1545		 */
   1546		if (!scrub_possible(ubi, e)) {
   1547			spin_unlock(&ubi->wl_lock);
   1548			err = -EBUSY;
   1549			goto out_resume;
   1550		}
   1551
   1552		if (in_pq(ubi, e)) {
   1553			prot_queue_del(ubi, e->pnum);
   1554			wl_tree_add(e, &ubi->scrub);
   1555			spin_unlock(&ubi->wl_lock);
   1556
   1557			err = ensure_wear_leveling(ubi, 1);
   1558		} else if (in_wl_tree(e, &ubi->used)) {
   1559			rb_erase(&e->u.rb, &ubi->used);
   1560			wl_tree_add(e, &ubi->scrub);
   1561			spin_unlock(&ubi->wl_lock);
   1562
   1563			err = ensure_wear_leveling(ubi, 1);
   1564		} else if (in_wl_tree(e, &ubi->free)) {
   1565			rb_erase(&e->u.rb, &ubi->free);
   1566			ubi->free_count--;
   1567			spin_unlock(&ubi->wl_lock);
   1568
   1569			/*
   1570			 * This PEB is empty we can schedule it for
   1571			 * erasure right away. No wear leveling needed.
   1572			 */
   1573			err = schedule_erase(ubi, e, UBI_UNKNOWN, UBI_UNKNOWN,
   1574					     force ? 0 : 1, true);
   1575		} else {
   1576			spin_unlock(&ubi->wl_lock);
   1577			err = -EAGAIN;
   1578		}
   1579
   1580		if (!err && !force)
   1581			err = -EUCLEAN;
   1582	} else {
   1583		err = 0;
   1584	}
   1585
   1586out_resume:
   1587	up_write(&ubi->work_sem);
   1588out:
   1589
   1590	return err;
   1591}
   1592
   1593/**
   1594 * tree_destroy - destroy an RB-tree.
   1595 * @ubi: UBI device description object
   1596 * @root: the root of the tree to destroy
   1597 */
   1598static void tree_destroy(struct ubi_device *ubi, struct rb_root *root)
   1599{
   1600	struct rb_node *rb;
   1601	struct ubi_wl_entry *e;
   1602
   1603	rb = root->rb_node;
   1604	while (rb) {
   1605		if (rb->rb_left)
   1606			rb = rb->rb_left;
   1607		else if (rb->rb_right)
   1608			rb = rb->rb_right;
   1609		else {
   1610			e = rb_entry(rb, struct ubi_wl_entry, u.rb);
   1611
   1612			rb = rb_parent(rb);
   1613			if (rb) {
   1614				if (rb->rb_left == &e->u.rb)
   1615					rb->rb_left = NULL;
   1616				else
   1617					rb->rb_right = NULL;
   1618			}
   1619
   1620			wl_entry_destroy(ubi, e);
   1621		}
   1622	}
   1623}
   1624
   1625/**
   1626 * ubi_thread - UBI background thread.
   1627 * @u: the UBI device description object pointer
   1628 */
   1629int ubi_thread(void *u)
   1630{
   1631	int failures = 0;
   1632	struct ubi_device *ubi = u;
   1633
   1634	ubi_msg(ubi, "background thread \"%s\" started, PID %d",
   1635		ubi->bgt_name, task_pid_nr(current));
   1636
   1637	set_freezable();
   1638	for (;;) {
   1639		int err;
   1640
   1641		if (kthread_should_stop())
   1642			break;
   1643
   1644		if (try_to_freeze())
   1645			continue;
   1646
   1647		spin_lock(&ubi->wl_lock);
   1648		if (list_empty(&ubi->works) || ubi->ro_mode ||
   1649		    !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) {
   1650			set_current_state(TASK_INTERRUPTIBLE);
   1651			spin_unlock(&ubi->wl_lock);
   1652
   1653			/*
   1654			 * Check kthread_should_stop() after we set the task
   1655			 * state to guarantee that we either see the stop bit
   1656			 * and exit or the task state is reset to runnable such
   1657			 * that it's not scheduled out indefinitely and detects
   1658			 * the stop bit at kthread_should_stop().
   1659			 */
   1660			if (kthread_should_stop()) {
   1661				set_current_state(TASK_RUNNING);
   1662				break;
   1663			}
   1664
   1665			schedule();
   1666			continue;
   1667		}
   1668		spin_unlock(&ubi->wl_lock);
   1669
   1670		err = do_work(ubi);
   1671		if (err) {
   1672			ubi_err(ubi, "%s: work failed with error code %d",
   1673				ubi->bgt_name, err);
   1674			if (failures++ > WL_MAX_FAILURES) {
   1675				/*
   1676				 * Too many failures, disable the thread and
   1677				 * switch to read-only mode.
   1678				 */
   1679				ubi_msg(ubi, "%s: %d consecutive failures",
   1680					ubi->bgt_name, WL_MAX_FAILURES);
   1681				ubi_ro_mode(ubi);
   1682				ubi->thread_enabled = 0;
   1683				continue;
   1684			}
   1685		} else
   1686			failures = 0;
   1687
   1688		cond_resched();
   1689	}
   1690
   1691	dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
   1692	ubi->thread_enabled = 0;
   1693	return 0;
   1694}
   1695
   1696/**
   1697 * shutdown_work - shutdown all pending works.
   1698 * @ubi: UBI device description object
   1699 */
   1700static void shutdown_work(struct ubi_device *ubi)
   1701{
   1702	while (!list_empty(&ubi->works)) {
   1703		struct ubi_work *wrk;
   1704
   1705		wrk = list_entry(ubi->works.next, struct ubi_work, list);
   1706		list_del(&wrk->list);
   1707		wrk->func(ubi, wrk, 1);
   1708		ubi->works_count -= 1;
   1709		ubi_assert(ubi->works_count >= 0);
   1710	}
   1711}
   1712
   1713/**
   1714 * erase_aeb - erase a PEB given in UBI attach info PEB
   1715 * @ubi: UBI device description object
   1716 * @aeb: UBI attach info PEB
   1717 * @sync: If true, erase synchronously. Otherwise schedule for erasure
   1718 */
   1719static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync)
   1720{
   1721	struct ubi_wl_entry *e;
   1722	int err;
   1723
   1724	e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
   1725	if (!e)
   1726		return -ENOMEM;
   1727
   1728	e->pnum = aeb->pnum;
   1729	e->ec = aeb->ec;
   1730	ubi->lookuptbl[e->pnum] = e;
   1731
   1732	if (sync) {
   1733		err = sync_erase(ubi, e, false);
   1734		if (err)
   1735			goto out_free;
   1736
   1737		wl_tree_add(e, &ubi->free);
   1738		ubi->free_count++;
   1739	} else {
   1740		err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false);
   1741		if (err)
   1742			goto out_free;
   1743	}
   1744
   1745	return 0;
   1746
   1747out_free:
   1748	wl_entry_destroy(ubi, e);
   1749
   1750	return err;
   1751}
   1752
   1753/**
   1754 * ubi_wl_init - initialize the WL sub-system using attaching information.
   1755 * @ubi: UBI device description object
   1756 * @ai: attaching information
   1757 *
   1758 * This function returns zero in case of success, and a negative error code in
   1759 * case of failure.
   1760 */
   1761int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
   1762{
   1763	int err, i, reserved_pebs, found_pebs = 0;
   1764	struct rb_node *rb1, *rb2;
   1765	struct ubi_ainf_volume *av;
   1766	struct ubi_ainf_peb *aeb, *tmp;
   1767	struct ubi_wl_entry *e;
   1768
   1769	ubi->used = ubi->erroneous = ubi->free = ubi->scrub = RB_ROOT;
   1770	spin_lock_init(&ubi->wl_lock);
   1771	mutex_init(&ubi->move_mutex);
   1772	init_rwsem(&ubi->work_sem);
   1773	ubi->max_ec = ai->max_ec;
   1774	INIT_LIST_HEAD(&ubi->works);
   1775
   1776	sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num);
   1777
   1778	err = -ENOMEM;
   1779	ubi->lookuptbl = kcalloc(ubi->peb_count, sizeof(void *), GFP_KERNEL);
   1780	if (!ubi->lookuptbl)
   1781		return err;
   1782
   1783	for (i = 0; i < UBI_PROT_QUEUE_LEN; i++)
   1784		INIT_LIST_HEAD(&ubi->pq[i]);
   1785	ubi->pq_head = 0;
   1786
   1787	ubi->free_count = 0;
   1788	list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) {
   1789		cond_resched();
   1790
   1791		err = erase_aeb(ubi, aeb, false);
   1792		if (err)
   1793			goto out_free;
   1794
   1795		found_pebs++;
   1796	}
   1797
   1798	list_for_each_entry(aeb, &ai->free, u.list) {
   1799		cond_resched();
   1800
   1801		e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
   1802		if (!e) {
   1803			err = -ENOMEM;
   1804			goto out_free;
   1805		}
   1806
   1807		e->pnum = aeb->pnum;
   1808		e->ec = aeb->ec;
   1809		ubi_assert(e->ec >= 0);
   1810
   1811		wl_tree_add(e, &ubi->free);
   1812		ubi->free_count++;
   1813
   1814		ubi->lookuptbl[e->pnum] = e;
   1815
   1816		found_pebs++;
   1817	}
   1818
   1819	ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) {
   1820		ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) {
   1821			cond_resched();
   1822
   1823			e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
   1824			if (!e) {
   1825				err = -ENOMEM;
   1826				goto out_free;
   1827			}
   1828
   1829			e->pnum = aeb->pnum;
   1830			e->ec = aeb->ec;
   1831			ubi->lookuptbl[e->pnum] = e;
   1832
   1833			if (!aeb->scrub) {
   1834				dbg_wl("add PEB %d EC %d to the used tree",
   1835				       e->pnum, e->ec);
   1836				wl_tree_add(e, &ubi->used);
   1837			} else {
   1838				dbg_wl("add PEB %d EC %d to the scrub tree",
   1839				       e->pnum, e->ec);
   1840				wl_tree_add(e, &ubi->scrub);
   1841			}
   1842
   1843			found_pebs++;
   1844		}
   1845	}
   1846
   1847	list_for_each_entry(aeb, &ai->fastmap, u.list) {
   1848		cond_resched();
   1849
   1850		e = ubi_find_fm_block(ubi, aeb->pnum);
   1851
   1852		if (e) {
   1853			ubi_assert(!ubi->lookuptbl[e->pnum]);
   1854			ubi->lookuptbl[e->pnum] = e;
   1855		} else {
   1856			bool sync = false;
   1857
   1858			/*
   1859			 * Usually old Fastmap PEBs are scheduled for erasure
   1860			 * and we don't have to care about them but if we face
   1861			 * an power cut before scheduling them we need to
   1862			 * take care of them here.
   1863			 */
   1864			if (ubi->lookuptbl[aeb->pnum])
   1865				continue;
   1866
   1867			/*
   1868			 * The fastmap update code might not find a free PEB for
   1869			 * writing the fastmap anchor to and then reuses the
   1870			 * current fastmap anchor PEB. When this PEB gets erased
   1871			 * and a power cut happens before it is written again we
   1872			 * must make sure that the fastmap attach code doesn't
   1873			 * find any outdated fastmap anchors, hence we erase the
   1874			 * outdated fastmap anchor PEBs synchronously here.
   1875			 */
   1876			if (aeb->vol_id == UBI_FM_SB_VOLUME_ID)
   1877				sync = true;
   1878
   1879			err = erase_aeb(ubi, aeb, sync);
   1880			if (err)
   1881				goto out_free;
   1882		}
   1883
   1884		found_pebs++;
   1885	}
   1886
   1887	dbg_wl("found %i PEBs", found_pebs);
   1888
   1889	ubi_assert(ubi->good_peb_count == found_pebs);
   1890
   1891	reserved_pebs = WL_RESERVED_PEBS;
   1892	ubi_fastmap_init(ubi, &reserved_pebs);
   1893
   1894	if (ubi->avail_pebs < reserved_pebs) {
   1895		ubi_err(ubi, "no enough physical eraseblocks (%d, need %d)",
   1896			ubi->avail_pebs, reserved_pebs);
   1897		if (ubi->corr_peb_count)
   1898			ubi_err(ubi, "%d PEBs are corrupted and not used",
   1899				ubi->corr_peb_count);
   1900		err = -ENOSPC;
   1901		goto out_free;
   1902	}
   1903	ubi->avail_pebs -= reserved_pebs;
   1904	ubi->rsvd_pebs += reserved_pebs;
   1905
   1906	/* Schedule wear-leveling if needed */
   1907	err = ensure_wear_leveling(ubi, 0);
   1908	if (err)
   1909		goto out_free;
   1910
   1911#ifdef CONFIG_MTD_UBI_FASTMAP
   1912	if (!ubi->ro_mode && !ubi->fm_disabled)
   1913		ubi_ensure_anchor_pebs(ubi);
   1914#endif
   1915	return 0;
   1916
   1917out_free:
   1918	shutdown_work(ubi);
   1919	tree_destroy(ubi, &ubi->used);
   1920	tree_destroy(ubi, &ubi->free);
   1921	tree_destroy(ubi, &ubi->scrub);
   1922	kfree(ubi->lookuptbl);
   1923	return err;
   1924}
   1925
   1926/**
   1927 * protection_queue_destroy - destroy the protection queue.
   1928 * @ubi: UBI device description object
   1929 */
   1930static void protection_queue_destroy(struct ubi_device *ubi)
   1931{
   1932	int i;
   1933	struct ubi_wl_entry *e, *tmp;
   1934
   1935	for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) {
   1936		list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) {
   1937			list_del(&e->u.list);
   1938			wl_entry_destroy(ubi, e);
   1939		}
   1940	}
   1941}
   1942
   1943/**
   1944 * ubi_wl_close - close the wear-leveling sub-system.
   1945 * @ubi: UBI device description object
   1946 */
   1947void ubi_wl_close(struct ubi_device *ubi)
   1948{
   1949	dbg_wl("close the WL sub-system");
   1950	ubi_fastmap_close(ubi);
   1951	shutdown_work(ubi);
   1952	protection_queue_destroy(ubi);
   1953	tree_destroy(ubi, &ubi->used);
   1954	tree_destroy(ubi, &ubi->erroneous);
   1955	tree_destroy(ubi, &ubi->free);
   1956	tree_destroy(ubi, &ubi->scrub);
   1957	kfree(ubi->lookuptbl);
   1958}
   1959
   1960/**
   1961 * self_check_ec - make sure that the erase counter of a PEB is correct.
   1962 * @ubi: UBI device description object
   1963 * @pnum: the physical eraseblock number to check
   1964 * @ec: the erase counter to check
   1965 *
   1966 * This function returns zero if the erase counter of physical eraseblock @pnum
   1967 * is equivalent to @ec, and a negative error code if not or if an error
   1968 * occurred.
   1969 */
   1970static int self_check_ec(struct ubi_device *ubi, int pnum, int ec)
   1971{
   1972	int err;
   1973	long long read_ec;
   1974	struct ubi_ec_hdr *ec_hdr;
   1975
   1976	if (!ubi_dbg_chk_gen(ubi))
   1977		return 0;
   1978
   1979	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
   1980	if (!ec_hdr)
   1981		return -ENOMEM;
   1982
   1983	err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0);
   1984	if (err && err != UBI_IO_BITFLIPS) {
   1985		/* The header does not have to exist */
   1986		err = 0;
   1987		goto out_free;
   1988	}
   1989
   1990	read_ec = be64_to_cpu(ec_hdr->ec);
   1991	if (ec != read_ec && read_ec - ec > 1) {
   1992		ubi_err(ubi, "self-check failed for PEB %d", pnum);
   1993		ubi_err(ubi, "read EC is %lld, should be %d", read_ec, ec);
   1994		dump_stack();
   1995		err = 1;
   1996	} else
   1997		err = 0;
   1998
   1999out_free:
   2000	kfree(ec_hdr);
   2001	return err;
   2002}
   2003
   2004/**
   2005 * self_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree.
   2006 * @ubi: UBI device description object
   2007 * @e: the wear-leveling entry to check
   2008 * @root: the root of the tree
   2009 *
   2010 * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it
   2011 * is not.
   2012 */
   2013static int self_check_in_wl_tree(const struct ubi_device *ubi,
   2014				 struct ubi_wl_entry *e, struct rb_root *root)
   2015{
   2016	if (!ubi_dbg_chk_gen(ubi))
   2017		return 0;
   2018
   2019	if (in_wl_tree(e, root))
   2020		return 0;
   2021
   2022	ubi_err(ubi, "self-check failed for PEB %d, EC %d, RB-tree %p ",
   2023		e->pnum, e->ec, root);
   2024	dump_stack();
   2025	return -EINVAL;
   2026}
   2027
   2028/**
   2029 * self_check_in_pq - check if wear-leveling entry is in the protection
   2030 *                        queue.
   2031 * @ubi: UBI device description object
   2032 * @e: the wear-leveling entry to check
   2033 *
   2034 * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not.
   2035 */
   2036static int self_check_in_pq(const struct ubi_device *ubi,
   2037			    struct ubi_wl_entry *e)
   2038{
   2039	if (!ubi_dbg_chk_gen(ubi))
   2040		return 0;
   2041
   2042	if (in_pq(ubi, e))
   2043		return 0;
   2044
   2045	ubi_err(ubi, "self-check failed for PEB %d, EC %d, Protect queue",
   2046		e->pnum, e->ec);
   2047	dump_stack();
   2048	return -EINVAL;
   2049}
   2050#ifndef CONFIG_MTD_UBI_FASTMAP
   2051static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi)
   2052{
   2053	struct ubi_wl_entry *e;
   2054
   2055	e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
   2056	self_check_in_wl_tree(ubi, e, &ubi->free);
   2057	ubi->free_count--;
   2058	ubi_assert(ubi->free_count >= 0);
   2059	rb_erase(&e->u.rb, &ubi->free);
   2060
   2061	return e;
   2062}
   2063
   2064/**
   2065 * produce_free_peb - produce a free physical eraseblock.
   2066 * @ubi: UBI device description object
   2067 *
   2068 * This function tries to make a free PEB by means of synchronous execution of
   2069 * pending works. This may be needed if, for example the background thread is
   2070 * disabled. Returns zero in case of success and a negative error code in case
   2071 * of failure.
   2072 */
   2073static int produce_free_peb(struct ubi_device *ubi)
   2074{
   2075	int err;
   2076
   2077	while (!ubi->free.rb_node && ubi->works_count) {
   2078		spin_unlock(&ubi->wl_lock);
   2079
   2080		dbg_wl("do one work synchronously");
   2081		err = do_work(ubi);
   2082
   2083		spin_lock(&ubi->wl_lock);
   2084		if (err)
   2085			return err;
   2086	}
   2087
   2088	return 0;
   2089}
   2090
   2091/**
   2092 * ubi_wl_get_peb - get a physical eraseblock.
   2093 * @ubi: UBI device description object
   2094 *
   2095 * This function returns a physical eraseblock in case of success and a
   2096 * negative error code in case of failure.
   2097 * Returns with ubi->fm_eba_sem held in read mode!
   2098 */
   2099int ubi_wl_get_peb(struct ubi_device *ubi)
   2100{
   2101	int err;
   2102	struct ubi_wl_entry *e;
   2103
   2104retry:
   2105	down_read(&ubi->fm_eba_sem);
   2106	spin_lock(&ubi->wl_lock);
   2107	if (!ubi->free.rb_node) {
   2108		if (ubi->works_count == 0) {
   2109			ubi_err(ubi, "no free eraseblocks");
   2110			ubi_assert(list_empty(&ubi->works));
   2111			spin_unlock(&ubi->wl_lock);
   2112			return -ENOSPC;
   2113		}
   2114
   2115		err = produce_free_peb(ubi);
   2116		if (err < 0) {
   2117			spin_unlock(&ubi->wl_lock);
   2118			return err;
   2119		}
   2120		spin_unlock(&ubi->wl_lock);
   2121		up_read(&ubi->fm_eba_sem);
   2122		goto retry;
   2123
   2124	}
   2125	e = wl_get_wle(ubi);
   2126	prot_queue_add(ubi, e);
   2127	spin_unlock(&ubi->wl_lock);
   2128
   2129	err = ubi_self_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset,
   2130				    ubi->peb_size - ubi->vid_hdr_aloffset);
   2131	if (err) {
   2132		ubi_err(ubi, "new PEB %d does not contain all 0xFF bytes", e->pnum);
   2133		return err;
   2134	}
   2135
   2136	return e->pnum;
   2137}
   2138#else
   2139#include "fastmap-wl.c"
   2140#endif