wl.c source code [linux/drivers/mtd/ubi/wl.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* Copyright (c) International Business Machines Corp., 2006
4	*
5	* Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner
6	*/
7
8	/*
9	* UBI wear-leveling sub-system.
10	*
11	* This sub-system is responsible for wear-leveling. It works in terms of
12	* physical eraseblocks and erase counters and knows nothing about logical
13	* eraseblocks, volumes, etc. From this sub-system's perspective all physical
14	* eraseblocks are of two types - used and free. Used physical eraseblocks are
15	* those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
16	* eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
17	*
18	* Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
19	* header. The rest of the physical eraseblock contains only %0xFF bytes.
20	*
21	* When physical eraseblocks are returned to the WL sub-system by means of the
22	* 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
23	* done asynchronously in context of the per-UBI device background thread,
24	* which is also managed by the WL sub-system.
25	*
26	* The wear-leveling is ensured by means of moving the contents of used
27	* physical eraseblocks with low erase counter to free physical eraseblocks
28	* with high erase counter.
29	*
30	* If the WL sub-system fails to erase a physical eraseblock, it marks it as
31	* bad.
32	*
33	* This sub-system is also responsible for scrubbing. If a bit-flip is detected
34	* in a physical eraseblock, it has to be moved. Technically this is the same
35	* as moving it for wear-leveling reasons.
36	*
37	* As it was said, for the UBI sub-system all physical eraseblocks are either
38	* "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
39	* used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub
40	* RB-trees, as well as (temporarily) in the @wl->pq queue.
41	*
42	* When the WL sub-system returns a physical eraseblock, the physical
43	* eraseblock is protected from being moved for some "time". For this reason,
44	* the physical eraseblock is not directly moved from the @wl->free tree to the
45	* @wl->used tree. There is a protection queue in between where this
46	* physical eraseblock is temporarily stored (@wl->pq).
47	*
48	* All this protection stuff is needed because:
49	* o we don't want to move physical eraseblocks just after we have given them
50	* to the user; instead, we first want to let users fill them up with data;
51	*
52	* o there is a chance that the user will put the physical eraseblock very
53	* soon, so it makes sense not to move it for some time, but wait.
54	*
55	* Physical eraseblocks stay protected only for limited time. But the "time" is
56	* measured in erase cycles in this case. This is implemented with help of the
57	* protection queue. Eraseblocks are put to the tail of this queue when they
58	* are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the
59	* head of the queue on each erase operation (for any eraseblock). So the
60	* length of the queue defines how may (global) erase cycles PEBs are protected.
61	*
62	* To put it differently, each physical eraseblock has 2 main states: free and
63	* used. The former state corresponds to the @wl->free tree. The latter state
64	* is split up on several sub-states:
65	* o the WL movement is allowed (@wl->used tree);
66	* o the WL movement is disallowed (@wl->erroneous) because the PEB is
67	* erroneous - e.g., there was a read error;
68	* o the WL movement is temporarily prohibited (@wl->pq queue);
69	* o scrubbing is needed (@wl->scrub tree).
70	*
71	* Depending on the sub-state, wear-leveling entries of the used physical
72	* eraseblocks may be kept in one of those structures.
73	*
74	* Note, in this implementation, we keep a small in-RAM object for each physical
75	* eraseblock. This is surely not a scalable solution. But it appears to be good
76	* enough for moderately large flashes and it is simple. In future, one may
77	* re-work this sub-system and make it more scalable.
78	*
79	* At the moment this sub-system does not utilize the sequence number, which
80	* was introduced relatively recently. But it would be wise to do this because
81	* the sequence number of a logical eraseblock characterizes how old is it. For
82	* example, when we move a PEB with low erase counter, and we need to pick the
83	* target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
84	* pick target PEB with an average EC if our PEB is not very "old". This is a
85	* room for future re-works of the WL sub-system.
86	*/
87
88	#include <linux/slab.h>
89	#include <linux/crc32.h>
90	#include <linux/freezer.h>
91	#include <linux/kthread.h>
92	#include "ubi.h"
93	#include "wl.h"
94
95	/ Number of physical eraseblocks reserved for wear-leveling purposes /
96	#define WL_RESERVED_PEBS 1
97
98	/*
99	* Maximum difference between two erase counters. If this threshold is
100	* exceeded, the WL sub-system starts moving data from used physical
101	* eraseblocks with low erase counter to free physical eraseblocks with high
102	* erase counter.
103	*/
104	#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
105
106	/*
107	* When a physical eraseblock is moved, the WL sub-system has to pick the target
108	* physical eraseblock to move to. The simplest way would be just to pick the
109	* one with the highest erase counter. But in certain workloads this could lead
110	* to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
111	* situation when the picked physical eraseblock is constantly erased after the
112	* data is written to it. So, we have a constant which limits the highest erase
113	* counter of the free physical eraseblock to pick. Namely, the WL sub-system
114	* does not pick eraseblocks with erase counter greater than the lowest erase
115	* counter plus %WL_FREE_MAX_DIFF.
116	*/
117	#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
118
119	/*
120	* Maximum number of consecutive background thread failures which is enough to
121	* switch to read-only mode.
122	*/
123	#define WL_MAX_FAILURES 32
124
125	static int self_check_ec(struct ubi_device ubi, int* pnum, int ec);
126	static int self_check_in_wl_tree(const struct ubi_device *ubi,
127	struct ubi_wl_entry e, struct* rb_root *root);
128	static int self_check_in_pq(const struct ubi_device *ubi,
129	struct ubi_wl_entry *e);
130
131	/**
132	* wl_tree_add - add a wear-leveling entry to a WL RB-tree.
133	* @e: the wear-leveling entry to add
134	* @root: the root of the tree
135	*
136	* Note, we use (erase counter, physical eraseblock number) pairs as keys in
137	* the @ubi->used and @ubi->free RB-trees.
138	*/
139	static void wl_tree_add(struct ubi_wl_entry e, struct* rb_root *root)
140	{
141	struct rb_node *p, parent = NULL;
142
143	p = &root->rb_node;
144	while (*p) {
145	struct ubi_wl_entry *e1;
146
147	parent = *p;
148	e1 = rb_entry(parent, struct ubi_wl_entry, u.rb);
149
150	if (e->ec < e1->ec)
151	p = &(*p)->rb_left;
152	else if (e->ec > e1->ec)
153	p = &(*p)->rb_right;
154	else {
155	ubi_assert(e->pnum != e1->pnum);
156	if (e->pnum < e1->pnum)
157	p = &(*p)->rb_left;
158	else
159	p = &(*p)->rb_right;
160	}
161	}
162
163	rb_link_node(node: &e->u.rb, parent, rb_link: p);
164	rb_insert_color(&e->u.rb, root);
165	}
166
167	/**
168	* wl_entry_destroy - destroy a wear-leveling entry.
169	* @ubi: UBI device description object
170	* @e: the wear-leveling entry to add
171	*
172	* This function destroys a wear leveling entry and removes
173	* the reference from the lookup table.
174	*/
175	static void wl_entry_destroy(struct ubi_device ubi, struct* ubi_wl_entry *e)
176	{
177	ubi->lookuptbl[e->pnum] = NULL;
178	kmem_cache_free(s: ubi_wl_entry_slab, objp: e);
179	}
180
181	/**
182	* do_work - do one pending work.
183	* @ubi: UBI device description object
184	* @executed: whether there is one work is executed
185	*
186	* This function returns zero in case of success and a negative error code in
187	* case of failure. If @executed is not NULL and there is one work executed,
188	* @executed is set as %1, otherwise @executed is set as %0.
189	*/
190	static int do_work(struct ubi_device ubi, int* *executed)
191	{
192	int err;
193	struct ubi_work *wrk;
194
195	cond_resched();
196
197	/*
198	* @ubi->work_sem is used to synchronize with the workers. Workers take
199	* it in read mode, so many of them may be doing works at a time. But
200	* the queue flush code has to be sure the whole queue of works is
201	* done, and it takes the mutex in write mode.
202	*/
203	down_read(sem: &ubi->work_sem);
204	spin_lock(lock: &ubi->wl_lock);
205	if (list_empty(head: &ubi->works)) {
206	spin_unlock(lock: &ubi->wl_lock);
207	up_read(sem: &ubi->work_sem);
208	if (executed)
209	*executed = `0`;
210	return `0`;
211	}
212
213	if (executed)
214	*executed = `1`;
215	wrk = list_entry(ubi->works.next, struct ubi_work, list);
216	list_del(entry: &wrk->list);
217	ubi->works_count -= `1`;
218	ubi_assert(ubi->works_count >= `0`);
219	spin_unlock(lock: &ubi->wl_lock);
220
221	/*
222	* Call the worker function. Do not touch the work structure
223	* after this call as it will have been freed or reused by that
224	* time by the worker function.
225	*/
226	err = wrk->func(ubi, wrk, `0`);
227	if (err)
228	ubi_err(ubi, fmt: "work failed with error code %d", err);
229	up_read(sem: &ubi->work_sem);
230
231	return err;
232	}
233
234	/**
235	* in_wl_tree - check if wear-leveling entry is present in a WL RB-tree.
236	* @e: the wear-leveling entry to check
237	* @root: the root of the tree
238	*
239	* This function returns non-zero if @e is in the @root RB-tree and zero if it
240	* is not.
241	*/
242	static int in_wl_tree(struct ubi_wl_entry e, struct* rb_root *root)
243	{
244	struct rb_node *p;
245
246	p = root->rb_node;
247	while (p) {
248	struct ubi_wl_entry *e1;
249
250	e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
251
252	if (e->pnum == e1->pnum) {
253	ubi_assert(e == e1);
254	return `1`;
255	}
256
257	if (e->ec < e1->ec)
258	p = p->rb_left;
259	else if (e->ec > e1->ec)
260	p = p->rb_right;
261	else {
262	ubi_assert(e->pnum != e1->pnum);
263	if (e->pnum < e1->pnum)
264	p = p->rb_left;
265	else
266	p = p->rb_right;
267	}
268	}
269
270	return `0`;
271	}
272
273	/**
274	* in_pq - check if a wear-leveling entry is present in the protection queue.
275	* @ubi: UBI device description object
276	* @e: the wear-leveling entry to check
277	*
278	* This function returns non-zero if @e is in the protection queue and zero
279	* if it is not.
280	*/
281	static inline int in_pq(const struct ubi_device ubi, struct* ubi_wl_entry *e)
282	{
283	struct ubi_wl_entry *p;
284	int i;
285
286	for (i = `0`; i < UBI_PROT_QUEUE_LEN; ++i)
287	list_for_each_entry(p, &ubi->pq[i], u.list)
288	if (p == e)
289	return `1`;
290
291	return `0`;
292	}
293
294	/**
295	* prot_queue_add - add physical eraseblock to the protection queue.
296	* @ubi: UBI device description object
297	* @e: the physical eraseblock to add
298	*
299	* This function adds @e to the tail of the protection queue @ubi->pq, where
300	* @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be
301	* temporarily protected from the wear-leveling worker. Note, @wl->lock has to
302	* be locked.
303	*/
304	static void prot_queue_add(struct ubi_device ubi, struct* ubi_wl_entry *e)
305	{
306	int pq_tail = ubi->pq_head - `1`;
307
308	if (pq_tail < `0`)
309	pq_tail = UBI_PROT_QUEUE_LEN - `1`;
310	ubi_assert(pq_tail >= `0` && pq_tail < UBI_PROT_QUEUE_LEN);
311	list_add_tail(new: &e->u.list, head: &ubi->pq[pq_tail]);
312	dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec);
313	}
314
315	/**
316	* find_wl_entry - find wear-leveling entry closest to certain erase counter.
317	* @ubi: UBI device description object
318	* @root: the RB-tree where to look for
319	* @diff: maximum possible difference from the smallest erase counter
320	* @pick_max: pick PEB even its erase counter beyonds 'min_ec + @diff'
321	*
322	* This function looks for a wear leveling entry with erase counter closest to
323	* min + @diff, where min is the smallest erase counter.
324	*/
325	static struct ubi_wl_entry find_wl_entry(struct* ubi_device *ubi,
326	struct rb_root root, int* diff,
327	int pick_max)
328	{
329	struct rb_node *p;
330	struct ubi_wl_entry *e;
331	int max;
332
333	e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
334	max = e->ec + diff;
335
336	p = root->rb_node;
337	while (p) {
338	struct ubi_wl_entry *e1;
339
340	e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
341	if (e1->ec >= max) {
342	if (pick_max)
343	e = e1;
344	p = p->rb_left;
345	} else {
346	p = p->rb_right;
347	e = e1;
348	}
349	}
350
351	return e;
352	}
353
354	/**
355	* find_mean_wl_entry - find wear-leveling entry with medium erase counter.
356	* @ubi: UBI device description object
357	* @root: the RB-tree where to look for
358	*
359	* This function looks for a wear leveling entry with medium erase counter,
360	* but not greater or equivalent than the lowest erase counter plus
361	* %WL_FREE_MAX_DIFF/2.
362	*/
363	static struct ubi_wl_entry find_mean_wl_entry(struct* ubi_device *ubi,
364	struct rb_root *root)
365	{
366	struct ubi_wl_entry e, first, *last;
367
368	first = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
369	last = rb_entry(rb_last(root), struct ubi_wl_entry, u.rb);
370
371	if (last->ec - first->ec < WL_FREE_MAX_DIFF) {
372	e = rb_entry(root->rb_node, struct ubi_wl_entry, u.rb);
373
374	/*
375	* If no fastmap has been written and fm_anchor is not
376	* reserved and this WL entry can be used as anchor PEB
377	* hold it back and return the second best WL entry such
378	* that fastmap can use the anchor PEB later.
379	*/
380	e = may_reserve_for_fm(ubi, e, root);
381	} else
382	e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/`2`, pick_max: `0`);
383
384	return e;
385	}
386
387	/**
388	* wl_get_wle - get a mean wl entry to be used by ubi_wl_get_peb() or
389	* refill_wl_user_pool().
390	* @ubi: UBI device description object
391	*
392	* This function returns a wear leveling entry in case of success and
393	* NULL in case of failure.
394	*/
395	static struct ubi_wl_entry wl_get_wle(struct* ubi_device *ubi)
396	{
397	struct ubi_wl_entry *e;
398
399	e = find_mean_wl_entry(ubi, root: &ubi->free);
400	if (!e) {
401	ubi_err(ubi, fmt: "no free eraseblocks");
402	return NULL;
403	}
404
405	self_check_in_wl_tree(ubi, e, root: &ubi->free);
406
407	/*
408	* Move the physical eraseblock to the protection queue where it will
409	* be protected from being moved for some time.
410	*/
411	rb_erase(&e->u.rb, &ubi->free);
412	ubi->free_count--;
413	dbg_wl("PEB %d EC %d", e->pnum, e->ec);
414
415	return e;
416	}
417
418	/**
419	* prot_queue_del - remove a physical eraseblock from the protection queue.
420	* @ubi: UBI device description object
421	* @pnum: the physical eraseblock to remove
422	*
423	* This function deletes PEB @pnum from the protection queue and returns zero
424	* in case of success and %-ENODEV if the PEB was not found.
425	*/
426	static int prot_queue_del(struct ubi_device ubi, int* pnum)
427	{
428	struct ubi_wl_entry *e;
429
430	e = ubi->lookuptbl[pnum];
431	if (!e)
432	return -ENODEV;
433
434	if (self_check_in_pq(ubi, e))
435	return -ENODEV;
436
437	list_del(entry: &e->u.list);
438	dbg_wl("deleted PEB %d from the protection queue", e->pnum);
439	return `0`;
440	}
441
442	/**
443	* ubi_sync_erase - synchronously erase a physical eraseblock.
444	* @ubi: UBI device description object
445	* @e: the physical eraseblock to erase
446	* @torture: if the physical eraseblock has to be tortured
447	*
448	* This function returns zero in case of success and a negative error code in
449	* case of failure.
450	*/
451	int ubi_sync_erase(struct ubi_device ubi, struct* ubi_wl_entry e, int* torture)
452	{
453	int err;
454	struct ubi_ec_hdr *ec_hdr;
455	unsigned long long ec = e->ec;
456
457	dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec);
458
459	err = self_check_ec(ubi, pnum: e->pnum, ec: e->ec);
460	if (err)
461	return -EINVAL;
462
463	ec_hdr = kzalloc(size: ubi->ec_hdr_alsize, GFP_NOFS);
464	if (!ec_hdr)
465	return -ENOMEM;
466
467	err = ubi_io_sync_erase(ubi, pnum: e->pnum, torture);
468	if (err < `0`)
469	goto out_free;
470
471	ec += err;
472	if (ec > UBI_MAX_ERASECOUNTER) {
473	/*
474	* Erase counter overflow. Upgrade UBI and use 64-bit
475	* erase counters internally.
476	*/
477	ubi_err(ubi, fmt: "erase counter overflow at PEB %d, EC %llu",
478	e->pnum, ec);
479	err = -EINVAL;
480	goto out_free;
481	}
482
483	dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec);
484
485	ec_hdr->ec = cpu_to_be64(ec);
486
487	err = ubi_io_write_ec_hdr(ubi, pnum: e->pnum, ec_hdr);
488	if (err)
489	goto out_free;
490
491	e->ec = ec;
492	spin_lock(lock: &ubi->wl_lock);
493	if (e->ec > ubi->max_ec)
494	ubi->max_ec = e->ec;
495	spin_unlock(lock: &ubi->wl_lock);
496
497	out_free:
498	kfree(objp: ec_hdr);
499	return err;
500	}
501
502	/**
503	* serve_prot_queue - check if it is time to stop protecting PEBs.
504	* @ubi: UBI device description object
505	*
506	* This function is called after each erase operation and removes PEBs from the
507	* tail of the protection queue. These PEBs have been protected for long enough
508	* and should be moved to the used tree.
509	*/
510	static void serve_prot_queue(struct ubi_device *ubi)
511	{
512	struct ubi_wl_entry e, tmp;
513	int count;
514
515	/*
516	* There may be several protected physical eraseblock to remove,
517	* process them all.
518	*/
519	repeat:
520	count = `0`;
521	spin_lock(lock: &ubi->wl_lock);
522	list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) {
523	dbg_wl("PEB %d EC %d protection over, move to used tree",
524	e->pnum, e->ec);
525
526	list_del(entry: &e->u.list);
527	wl_tree_add(e, root: &ubi->used);
528	if (count++ > `32`) {
529	/*
530	* Let's be nice and avoid holding the spinlock for
531	* too long.
532	*/
533	spin_unlock(lock: &ubi->wl_lock);
534	cond_resched();
535	goto repeat;
536	}
537	}
538
539	ubi->pq_head += `1`;
540	if (ubi->pq_head == UBI_PROT_QUEUE_LEN)
541	ubi->pq_head = `0`;
542	ubi_assert(ubi->pq_head >= `0` && ubi->pq_head < UBI_PROT_QUEUE_LEN);
543	spin_unlock(lock: &ubi->wl_lock);
544	}
545
546	/**
547	* __schedule_ubi_work - schedule a work.
548	* @ubi: UBI device description object
549	* @wrk: the work to schedule
550	*
551	* This function adds a work defined by @wrk to the tail of the pending works
552	* list. Can only be used if ubi->work_sem is already held in read mode!
553	*/
554	static void __schedule_ubi_work(struct ubi_device ubi, struct* ubi_work *wrk)
555	{
556	spin_lock(lock: &ubi->wl_lock);
557	list_add_tail(new: &wrk->list, head: &ubi->works);
558	ubi_assert(ubi->works_count >= `0`);
559	ubi->works_count += `1`;
560	if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi))
561	wake_up_process(tsk: ubi->bgt_thread);
562	spin_unlock(lock: &ubi->wl_lock);
563	}
564
565	/**
566	* schedule_ubi_work - schedule a work.
567	* @ubi: UBI device description object
568	* @wrk: the work to schedule
569	*
570	* This function adds a work defined by @wrk to the tail of the pending works
571	* list.
572	*/
573	static void schedule_ubi_work(struct ubi_device ubi, struct* ubi_work *wrk)
574	{
575	down_read(sem: &ubi->work_sem);
576	__schedule_ubi_work(ubi, wrk);
577	up_read(sem: &ubi->work_sem);
578	}
579
580	static int erase_worker(struct ubi_device ubi, struct* ubi_work *wl_wrk,
581	int shutdown);
582
583	/**
584	* schedule_erase - schedule an erase work.
585	* @ubi: UBI device description object
586	* @e: the WL entry of the physical eraseblock to erase
587	* @vol_id: the volume ID that last used this PEB
588	* @lnum: the last used logical eraseblock number for the PEB
589	* @torture: if the physical eraseblock has to be tortured
590	* @nested: denotes whether the work_sem is already held
591	*
592	* This function returns zero in case of success and a %-ENOMEM in case of
593	* failure.
594	*/
595	static int schedule_erase(struct ubi_device ubi, struct* ubi_wl_entry *e,
596	int vol_id, int lnum, int torture, bool nested)
597	{
598	struct ubi_work *wl_wrk;
599
600	ubi_assert(e);
601
602	dbg_wl("schedule erasure of PEB %d, EC %d, torture %d",
603	e->pnum, e->ec, torture);
604
605	wl_wrk = kmalloc(size: sizeof(struct ubi_work), GFP_NOFS);
606	if (!wl_wrk)
607	return -ENOMEM;
608
609	wl_wrk->func = &erase_worker;
610	wl_wrk->e = e;
611	wl_wrk->vol_id = vol_id;
612	wl_wrk->lnum = lnum;
613	wl_wrk->torture = torture;
614
615	if (nested)
616	__schedule_ubi_work(ubi, wrk: wl_wrk);
617	else
618	schedule_ubi_work(ubi, wrk: wl_wrk);
619	return `0`;
620	}
621
622	static int __erase_worker(struct ubi_device ubi, struct* ubi_work *wl_wrk);
623	/**
624	* do_sync_erase - run the erase worker synchronously.
625	* @ubi: UBI device description object
626	* @e: the WL entry of the physical eraseblock to erase
627	* @vol_id: the volume ID that last used this PEB
628	* @lnum: the last used logical eraseblock number for the PEB
629	* @torture: if the physical eraseblock has to be tortured
630	*
631	*/
632	static int do_sync_erase(struct ubi_device ubi, struct* ubi_wl_entry *e,
633	int vol_id, int lnum, int torture)
634	{
635	struct ubi_work wl_wrk;
636
637	dbg_wl("sync erase of PEB %i", e->pnum);
638
639	wl_wrk.e = e;
640	wl_wrk.vol_id = vol_id;
641	wl_wrk.lnum = lnum;
642	wl_wrk.torture = torture;
643
644	return __erase_worker(ubi, wl_wrk: &wl_wrk);
645	}
646
647	static int ensure_wear_leveling(struct ubi_device ubi, int* nested);
648	/**
649	* wear_leveling_worker - wear-leveling worker function.
650	* @ubi: UBI device description object
651	* @wrk: the work object
652	* @shutdown: non-zero if the worker has to free memory and exit
653	* because the WL-subsystem is shutting down
654	*
655	* This function copies a more worn out physical eraseblock to a less worn out
656	* one. Returns zero in case of success and a negative error code in case of
657	* failure.
658	*/
659	static int wear_leveling_worker(struct ubi_device ubi, struct* ubi_work *wrk,
660	int shutdown)
661	{
662	int err, scrubbing = `0`, torture = `0`, protect = `0`, erroneous = `0`;
663	int erase = `0`, keep = `0`, vol_id = -`1`, lnum = -`1`;
664	struct ubi_wl_entry e1, e2;
665	struct ubi_vid_io_buf *vidb;
666	struct ubi_vid_hdr *vid_hdr;
667	int dst_leb_clean = `0`;
668
669	kfree(objp: wrk);
670	if (shutdown)
671	return `0`;
672
673	vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS);
674	if (!vidb)
675	return -ENOMEM;
676
677	vid_hdr = ubi_get_vid_hdr(vidb);
678
679	down_read(sem: &ubi->fm_eba_sem);
680	mutex_lock(&ubi->move_mutex);
681	spin_lock(lock: &ubi->wl_lock);
682	ubi_assert(!ubi->move_from && !ubi->move_to);
683	ubi_assert(!ubi->move_to_put);
684
685	#ifdef CONFIG_MTD_UBI_FASTMAP
686	if (!next_peb_for_wl(ubi) \|\|
687	#else
688	if (!ubi->free.rb_node \|\|
689	#endif
690	(!ubi->used.rb_node && !ubi->scrub.rb_node)) {
691	/*
692	* No free physical eraseblocks? Well, they must be waiting in
693	* the queue to be erased. Cancel movement - it will be
694	* triggered again when a free physical eraseblock appears.
695	*
696	* No used physical eraseblocks? They must be temporarily
697	* protected from being moved. They will be moved to the
698	* @ubi->used tree later and the wear-leveling will be
699	* triggered again.
700	*/
701	dbg_wl("cancel WL, a list is empty: free %d, used %d",
702	!ubi->free.rb_node, !ubi->used.rb_node);
703	goto out_cancel;
704	}
705
706	#ifdef CONFIG_MTD_UBI_FASTMAP
707	e1 = find_anchor_wl_entry(root: &ubi->used);
708	if (e1 && ubi->fm_anchor &&
709	(ubi->fm_anchor->ec - e1->ec >= UBI_WL_THRESHOLD)) {
710	ubi->fm_do_produce_anchor = `1`;
711	/*
712	* fm_anchor is no longer considered a good anchor.
713	* NULL assignment also prevents multiple wear level checks
714	* of this PEB.
715	*/
716	wl_tree_add(e: ubi->fm_anchor, root: &ubi->free);
717	ubi->fm_anchor = NULL;
718	ubi->free_count++;
719	}
720
721	if (ubi->fm_do_produce_anchor) {
722	if (!e1)
723	goto out_cancel;
724	e2 = get_peb_for_wl(ubi);
725	if (!e2)
726	goto out_cancel;
727
728	self_check_in_wl_tree(ubi, e: e1, root: &ubi->used);
729	rb_erase(&e1->u.rb, &ubi->used);
730	dbg_wl("anchor-move PEB %d to PEB %d", e1->pnum, e2->pnum);
731	ubi->fm_do_produce_anchor = `0`;
732	} else if (!ubi->scrub.rb_node) {
733	#else
734	if (!ubi->scrub.rb_node) {
735	#endif
736	/*
737	* Now pick the least worn-out used physical eraseblock and a
738	* highly worn-out free physical eraseblock. If the erase
739	* counters differ much enough, start wear-leveling.
740	*/
741	e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
742	e2 = get_peb_for_wl(ubi);
743	if (!e2)
744	goto out_cancel;
745
746	if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
747	dbg_wl("no WL needed: min used EC %d, max free EC %d",
748	e1->ec, e2->ec);
749
750	/ Give the unused PEB back /
751	wl_tree_add(e: e2, root: &ubi->free);
752	ubi->free_count++;
753	goto out_cancel;
754	}
755	self_check_in_wl_tree(ubi, e: e1, root: &ubi->used);
756	rb_erase(&e1->u.rb, &ubi->used);
757	dbg_wl("move PEB %d EC %d to PEB %d EC %d",
758	e1->pnum, e1->ec, e2->pnum, e2->ec);
759	} else {
760	/ Perform scrubbing /
761	scrubbing = `1`;
762	e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb);
763	e2 = get_peb_for_wl(ubi);
764	if (!e2)
765	goto out_cancel;
766
767	self_check_in_wl_tree(ubi, e: e1, root: &ubi->scrub);
768	rb_erase(&e1->u.rb, &ubi->scrub);
769	dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
770	}
771
772	ubi->move_from = e1;
773	ubi->move_to = e2;
774	spin_unlock(lock: &ubi->wl_lock);
775
776	/*
777	* Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum.
778	* We so far do not know which logical eraseblock our physical
779	* eraseblock (@e1) belongs to. We have to read the volume identifier
780	* header first.
781	*
782	* Note, we are protected from this PEB being unmapped and erased. The
783	* 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB
784	* which is being moved was unmapped.
785	*/
786
787	err = ubi_io_read_vid_hdr(ubi, pnum: e1->pnum, vidb, verbose: `0`);
788	if (err && err != UBI_IO_BITFLIPS) {
789	dst_leb_clean = `1`;
790	if (err == UBI_IO_FF) {
791	/*
792	* We are trying to move PEB without a VID header. UBI
793	* always write VID headers shortly after the PEB was
794	* given, so we have a situation when it has not yet
795	* had a chance to write it, because it was preempted.
796	* So add this PEB to the protection queue so far,
797	* because presumably more data will be written there
798	* (including the missing VID header), and then we'll
799	* move it.
800	*/
801	dbg_wl("PEB %d has no VID header", e1->pnum);
802	protect = `1`;
803	goto out_not_moved;
804	} else if (err == UBI_IO_FF_BITFLIPS) {
805	/*
806	* The same situation as %UBI_IO_FF, but bit-flips were
807	* detected. It is better to schedule this PEB for
808	* scrubbing.
809	*/
810	dbg_wl("PEB %d has no VID header but has bit-flips",
811	e1->pnum);
812	scrubbing = `1`;
813	goto out_not_moved;
814	} else if (ubi->fast_attach && err == UBI_IO_BAD_HDR_EBADMSG) {
815	/*
816	* While a full scan would detect interrupted erasures
817	* at attach time we can face them here when attached from
818	* Fastmap.
819	*/
820	dbg_wl("PEB %d has ECC errors, maybe from an interrupted erasure",
821	e1->pnum);
822	erase = `1`;
823	goto out_not_moved;
824	}
825
826	ubi_err(ubi, fmt: "error %d while reading VID header from PEB %d",
827	err, e1->pnum);
828	goto out_error;
829	}
830
831	vol_id = be32_to_cpu(vid_hdr->vol_id);
832	lnum = be32_to_cpu(vid_hdr->lnum);
833
834	err = ubi_eba_copy_leb(ubi, from: e1->pnum, to: e2->pnum, vidb);
835	if (err) {
836	if (err == MOVE_CANCEL_RACE) {
837	/*
838	* The LEB has not been moved because the volume is
839	* being deleted or the PEB has been put meanwhile. We
840	* should prevent this PEB from being selected for
841	* wear-leveling movement again, so put it to the
842	* protection queue.
843	*/
844	protect = `1`;
845	dst_leb_clean = `1`;
846	goto out_not_moved;
847	}
848	if (err == MOVE_RETRY) {
849	scrubbing = `1`;
850	dst_leb_clean = `1`;
851	goto out_not_moved;
852	}
853	if (err == MOVE_TARGET_BITFLIPS \|\| err == MOVE_TARGET_WR_ERR \|\|
854	err == MOVE_TARGET_RD_ERR) {
855	/*
856	* Target PEB had bit-flips or write error - torture it.
857	*/
858	torture = `1`;
859	keep = `1`;
860	goto out_not_moved;
861	}
862
863	if (err == MOVE_SOURCE_RD_ERR) {
864	/*
865	* An error happened while reading the source PEB. Do
866	* not switch to R/O mode in this case, and give the
867	* upper layers a possibility to recover from this,
868	* e.g. by unmapping corresponding LEB. Instead, just
869	* put this PEB to the @ubi->erroneous list to prevent
870	* UBI from trying to move it over and over again.
871	*/
872	if (ubi->erroneous_peb_count > ubi->max_erroneous) {
873	ubi_err(ubi, fmt: "too many erroneous eraseblocks (%d)",
874	ubi->erroneous_peb_count);
875	goto out_error;
876	}
877	dst_leb_clean = `1`;
878	erroneous = `1`;
879	goto out_not_moved;
880	}
881
882	if (err < `0`)
883	goto out_error;
884
885	ubi_assert(`0`);
886	}
887
888	/ The PEB has been successfully moved /
889	if (scrubbing)
890	ubi_msg(ubi, fmt: "scrubbed PEB %d (LEB %d:%d), data moved to PEB %d",
891	e1->pnum, vol_id, lnum, e2->pnum);
892	ubi_free_vid_buf(vidb);
893
894	spin_lock(lock: &ubi->wl_lock);
895	if (!ubi->move_to_put) {
896	wl_tree_add(e: e2, root: &ubi->used);
897	e2 = NULL;
898	}
899	ubi->move_from = ubi->move_to = NULL;
900	ubi->move_to_put = ubi->wl_scheduled = `0`;
901	spin_unlock(lock: &ubi->wl_lock);
902
903	err = do_sync_erase(ubi, e: e1, vol_id, lnum, torture: `0`);
904	if (err) {
905	if (e2) {
906	spin_lock(lock: &ubi->wl_lock);
907	wl_entry_destroy(ubi, e: e2);
908	spin_unlock(lock: &ubi->wl_lock);
909	}
910	goto out_ro;
911	}
912
913	if (e2) {
914	/*
915	* Well, the target PEB was put meanwhile, schedule it for
916	* erasure.
917	*/
918	dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase",
919	e2->pnum, vol_id, lnum);
920	err = do_sync_erase(ubi, e: e2, vol_id, lnum, torture: `0`);
921	if (err)
922	goto out_ro;
923	}
924
925	dbg_wl("done");
926	mutex_unlock(lock: &ubi->move_mutex);
927	up_read(sem: &ubi->fm_eba_sem);
928	return `0`;
929
930	/*
931	* For some reasons the LEB was not moved, might be an error, might be
932	* something else. @e1 was not changed, so return it back. @e2 might
933	* have been changed, schedule it for erasure.
934	*/
935	out_not_moved:
936	if (vol_id != -`1`)
937	dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)",
938	e1->pnum, vol_id, lnum, e2->pnum, err);
939	else
940	dbg_wl("cancel moving PEB %d to PEB %d (%d)",
941	e1->pnum, e2->pnum, err);
942	spin_lock(lock: &ubi->wl_lock);
943	if (protect)
944	prot_queue_add(ubi, e: e1);
945	else if (erroneous) {
946	wl_tree_add(e: e1, root: &ubi->erroneous);
947	ubi->erroneous_peb_count += `1`;
948	} else if (scrubbing)
949	wl_tree_add(e: e1, root: &ubi->scrub);
950	else if (keep)
951	wl_tree_add(e: e1, root: &ubi->used);
952	if (dst_leb_clean) {
953	wl_tree_add(e: e2, root: &ubi->free);
954	ubi->free_count++;
955	}
956
957	ubi_assert(!ubi->move_to_put);
958	ubi->move_from = ubi->move_to = NULL;
959	ubi->wl_scheduled = `0`;
960	spin_unlock(lock: &ubi->wl_lock);
961
962	ubi_free_vid_buf(vidb);
963	if (dst_leb_clean) {
964	ensure_wear_leveling(ubi, nested: `1`);
965	} else {
966	err = do_sync_erase(ubi, e: e2, vol_id, lnum, torture);
967	if (err)
968	goto out_ro;
969	}
970
971	if (erase) {
972	err = do_sync_erase(ubi, e: e1, vol_id, lnum, torture: `1`);
973	if (err)
974	goto out_ro;
975	}
976
977	mutex_unlock(lock: &ubi->move_mutex);
978	up_read(sem: &ubi->fm_eba_sem);
979	return `0`;
980
981	out_error:
982	if (vol_id != -`1`)
983	ubi_err(ubi, fmt: "error %d while moving PEB %d to PEB %d",
984	err, e1->pnum, e2->pnum);
985	else
986	ubi_err(ubi, fmt: "error %d while moving PEB %d (LEB %d:%d) to PEB %d",
987	err, e1->pnum, vol_id, lnum, e2->pnum);
988	spin_lock(lock: &ubi->wl_lock);
989	ubi->move_from = ubi->move_to = NULL;
990	ubi->move_to_put = ubi->wl_scheduled = `0`;
991	wl_entry_destroy(ubi, e: e1);
992	wl_entry_destroy(ubi, e: e2);
993	spin_unlock(lock: &ubi->wl_lock);
994
995	ubi_free_vid_buf(vidb);
996
997	out_ro:
998	ubi_ro_mode(ubi);
999	mutex_unlock(lock: &ubi->move_mutex);
1000	up_read(sem: &ubi->fm_eba_sem);
1001	ubi_assert(err != `0`);
1002	return err < `0` ? err : -EIO;
1003
1004	out_cancel:
1005	ubi->wl_scheduled = `0`;
1006	spin_unlock(lock: &ubi->wl_lock);
1007	mutex_unlock(lock: &ubi->move_mutex);
1008	up_read(sem: &ubi->fm_eba_sem);
1009	ubi_free_vid_buf(vidb);
1010	return `0`;
1011	}
1012
1013	/**
1014	* ensure_wear_leveling - schedule wear-leveling if it is needed.
1015	* @ubi: UBI device description object
1016	* @nested: set to non-zero if this function is called from UBI worker
1017	*
1018	* This function checks if it is time to start wear-leveling and schedules it
1019	* if yes. This function returns zero in case of success and a negative error
1020	* code in case of failure.
1021	*/
1022	static int ensure_wear_leveling(struct ubi_device ubi, int* nested)
1023	{
1024	int err = `0`;
1025	struct ubi_work *wrk;
1026
1027	spin_lock(lock: &ubi->wl_lock);
1028	if (ubi->wl_scheduled)
1029	/ Wear-leveling is already in the work queue /
1030	goto out_unlock;
1031
1032	/*
1033	* If the ubi->scrub tree is not empty, scrubbing is needed, and the
1034	* WL worker has to be scheduled anyway.
1035	*/
1036	if (!ubi->scrub.rb_node) {
1037	#ifdef CONFIG_MTD_UBI_FASTMAP
1038	if (!need_wear_leveling(ubi))
1039	goto out_unlock;
1040	#else
1041	struct ubi_wl_entry *e1;
1042	struct ubi_wl_entry *e2;
1043
1044	if (!ubi->used.rb_node \|\| !ubi->free.rb_node)
1045	/ No physical eraseblocks - no deal /
1046	goto out_unlock;
1047
1048	/*
1049	* We schedule wear-leveling only if the difference between the
1050	* lowest erase counter of used physical eraseblocks and a high
1051	* erase counter of free physical eraseblocks is greater than
1052	* %UBI_WL_THRESHOLD.
1053	*/
1054	e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
1055	e2 = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, `0`);
1056
1057	if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
1058	goto out_unlock;
1059	#endif
1060	dbg_wl("schedule wear-leveling");
1061	} else
1062	dbg_wl("schedule scrubbing");
1063
1064	ubi->wl_scheduled = `1`;
1065	spin_unlock(lock: &ubi->wl_lock);
1066
1067	wrk = kmalloc(size: sizeof(struct ubi_work), GFP_NOFS);
1068	if (!wrk) {
1069	err = -ENOMEM;
1070	goto out_cancel;
1071	}
1072
1073	wrk->func = &wear_leveling_worker;
1074	if (nested)
1075	__schedule_ubi_work(ubi, wrk);
1076	else
1077	schedule_ubi_work(ubi, wrk);
1078	return err;
1079
1080	out_cancel:
1081	spin_lock(lock: &ubi->wl_lock);
1082	ubi->wl_scheduled = `0`;
1083	out_unlock:
1084	spin_unlock(lock: &ubi->wl_lock);
1085	return err;
1086	}
1087
1088	/**
1089	* __erase_worker - physical eraseblock erase worker function.
1090	* @ubi: UBI device description object
1091	* @wl_wrk: the work object
1092	*
1093	* This function erases a physical eraseblock and perform torture testing if
1094	* needed. It also takes care about marking the physical eraseblock bad if
1095	* needed. Returns zero in case of success and a negative error code in case of
1096	* failure.
1097	*/
1098	static int __erase_worker(struct ubi_device ubi, struct* ubi_work *wl_wrk)
1099	{
1100	struct ubi_wl_entry *e = wl_wrk->e;
1101	int pnum = e->pnum;
1102	int vol_id = wl_wrk->vol_id;
1103	int lnum = wl_wrk->lnum;
1104	int err, available_consumed = `0`;
1105
1106	dbg_wl("erase PEB %d EC %d LEB %d:%d",
1107	pnum, e->ec, wl_wrk->vol_id, wl_wrk->lnum);
1108
1109	err = ubi_sync_erase(ubi, e, torture: wl_wrk->torture);
1110	if (!err) {
1111	spin_lock(lock: &ubi->wl_lock);
1112
1113	if (!ubi->fm_disabled && !ubi->fm_anchor &&
1114	e->pnum < UBI_FM_MAX_START) {
1115	/*
1116	* Abort anchor production, if needed it will be
1117	* enabled again in the wear leveling started below.
1118	*/
1119	ubi->fm_anchor = e;
1120	ubi->fm_do_produce_anchor = `0`;
1121	} else {
1122	wl_tree_add(e, root: &ubi->free);
1123	ubi->free_count++;
1124	}
1125
1126	spin_unlock(lock: &ubi->wl_lock);
1127
1128	/*
1129	* One more erase operation has happened, take care about
1130	* protected physical eraseblocks.
1131	*/
1132	serve_prot_queue(ubi);
1133
1134	/ And take care about wear-leveling /
1135	err = ensure_wear_leveling(ubi, nested: `1`);
1136	return err;
1137	}
1138
1139	ubi_err(ubi, fmt: "failed to erase PEB %d, error %d", pnum, err);
1140
1141	if (err == -EINTR \|\| err == -ENOMEM \|\| err == -EAGAIN \|\|
1142	err == -EBUSY) {
1143	int err1;
1144
1145	/ Re-schedule the LEB for erasure /
1146	err1 = schedule_erase(ubi, e, vol_id, lnum, torture: `0`, nested: true);
1147	if (err1) {
1148	spin_lock(lock: &ubi->wl_lock);
1149	wl_entry_destroy(ubi, e);
1150	spin_unlock(lock: &ubi->wl_lock);
1151	err = err1;
1152	goto out_ro;
1153	}
1154	return err;
1155	}
1156
1157	spin_lock(lock: &ubi->wl_lock);
1158	wl_entry_destroy(ubi, e);
1159	spin_unlock(lock: &ubi->wl_lock);
1160	if (err != -EIO)
1161	/*
1162	* If this is not %-EIO, we have no idea what to do. Scheduling
1163	* this physical eraseblock for erasure again would cause
1164	* errors again and again. Well, lets switch to R/O mode.
1165	*/
1166	goto out_ro;
1167
1168	/ It is %-EIO, the PEB went bad /
1169
1170	if (!ubi->bad_allowed) {
1171	ubi_err(ubi, fmt: "bad physical eraseblock %d detected", pnum);
1172	goto out_ro;
1173	}
1174
1175	spin_lock(lock: &ubi->volumes_lock);
1176	if (ubi->beb_rsvd_pebs == `0`) {
1177	if (ubi->avail_pebs == `0`) {
1178	spin_unlock(lock: &ubi->volumes_lock);
1179	ubi_err(ubi, fmt: "no reserved/available physical eraseblocks");
1180	goto out_ro;
1181	}
1182	ubi->avail_pebs -= `1`;
1183	available_consumed = `1`;
1184	}
1185	spin_unlock(lock: &ubi->volumes_lock);
1186
1187	ubi_msg(ubi, fmt: "mark PEB %d as bad", pnum);
1188	err = ubi_io_mark_bad(ubi, pnum);
1189	if (err)
1190	goto out_ro;
1191
1192	spin_lock(lock: &ubi->volumes_lock);
1193	if (ubi->beb_rsvd_pebs > `0`) {
1194	if (available_consumed) {
1195	/*
1196	* The amount of reserved PEBs increased since we last
1197	* checked.
1198	*/
1199	ubi->avail_pebs += `1`;
1200	available_consumed = `0`;
1201	}
1202	ubi->beb_rsvd_pebs -= `1`;
1203	}
1204	ubi->bad_peb_count += `1`;
1205	ubi->good_peb_count -= `1`;
1206	ubi_calculate_reserved(ubi);
1207	if (available_consumed)
1208	ubi_warn(ubi, fmt: "no PEBs in the reserved pool, used an available PEB");
1209	else if (ubi->beb_rsvd_pebs)
1210	ubi_msg(ubi, fmt: "%d PEBs left in the reserve",
1211	ubi->beb_rsvd_pebs);
1212	else
1213	ubi_warn(ubi, fmt: "last PEB from the reserve was used");
1214	spin_unlock(lock: &ubi->volumes_lock);
1215
1216	return err;
1217
1218	out_ro:
1219	if (available_consumed) {
1220	spin_lock(lock: &ubi->volumes_lock);
1221	ubi->avail_pebs += `1`;
1222	spin_unlock(lock: &ubi->volumes_lock);
1223	}
1224	ubi_ro_mode(ubi);
1225	return err;
1226	}
1227
1228	static int erase_worker(struct ubi_device ubi, struct* ubi_work *wl_wrk,
1229	int shutdown)
1230	{
1231	int ret;
1232
1233	if (shutdown) {
1234	struct ubi_wl_entry *e = wl_wrk->e;
1235
1236	dbg_wl("cancel erasure of PEB %d EC %d", e->pnum, e->ec);
1237	kfree(objp: wl_wrk);
1238	wl_entry_destroy(ubi, e);
1239	return `0`;
1240	}
1241
1242	ret = __erase_worker(ubi, wl_wrk);
1243	kfree(objp: wl_wrk);
1244	return ret;
1245	}
1246
1247	/**
1248	* ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
1249	* @ubi: UBI device description object
1250	* @vol_id: the volume ID that last used this PEB
1251	* @lnum: the last used logical eraseblock number for the PEB
1252	* @pnum: physical eraseblock to return
1253	* @torture: if this physical eraseblock has to be tortured
1254	*
1255	* This function is called to return physical eraseblock @pnum to the pool of
1256	* free physical eraseblocks. The @torture flag has to be set if an I/O error
1257	* occurred to this @pnum and it has to be tested. This function returns zero
1258	* in case of success, and a negative error code in case of failure.
1259	*/
1260	int ubi_wl_put_peb(struct ubi_device ubi, int* vol_id, int lnum,
1261	int pnum, int torture)
1262	{
1263	int err;
1264	struct ubi_wl_entry *e;
1265
1266	dbg_wl("PEB %d", pnum);
1267	ubi_assert(pnum >= `0`);
1268	ubi_assert(pnum < ubi->peb_count);
1269
1270	down_read(sem: &ubi->fm_protect);
1271
1272	retry:
1273	spin_lock(lock: &ubi->wl_lock);
1274	e = ubi->lookuptbl[pnum];
1275	if (!e) {
1276	/*
1277	* This wl entry has been removed for some errors by other
1278	* process (eg. wear leveling worker), corresponding process
1279	* (except __erase_worker, which cannot concurrent with
1280	* ubi_wl_put_peb) will set ubi ro_mode at the same time,
1281	* just ignore this wl entry.
1282	*/
1283	spin_unlock(lock: &ubi->wl_lock);
1284	up_read(sem: &ubi->fm_protect);
1285	return `0`;
1286	}
1287	if (e == ubi->move_from) {
1288	/*
1289	* User is putting the physical eraseblock which was selected to
1290	* be moved. It will be scheduled for erasure in the
1291	* wear-leveling worker.
1292	*/
1293	dbg_wl("PEB %d is being moved, wait", pnum);
1294	spin_unlock(lock: &ubi->wl_lock);
1295
1296	/ Wait for the WL worker by taking the @ubi->move_mutex /
1297	mutex_lock(&ubi->move_mutex);
1298	mutex_unlock(lock: &ubi->move_mutex);
1299	goto retry;
1300	} else if (e == ubi->move_to) {
1301	/*
1302	* User is putting the physical eraseblock which was selected
1303	* as the target the data is moved to. It may happen if the EBA
1304	* sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
1305	* but the WL sub-system has not put the PEB to the "used" tree
1306	* yet, but it is about to do this. So we just set a flag which
1307	* will tell the WL worker that the PEB is not needed anymore
1308	* and should be scheduled for erasure.
1309	*/
1310	dbg_wl("PEB %d is the target of data moving", pnum);
1311	ubi_assert(!ubi->move_to_put);
1312	ubi->move_to_put = `1`;
1313	spin_unlock(lock: &ubi->wl_lock);
1314	up_read(sem: &ubi->fm_protect);
1315	return `0`;
1316	} else {
1317	if (in_wl_tree(e, root: &ubi->used)) {
1318	self_check_in_wl_tree(ubi, e, root: &ubi->used);
1319	rb_erase(&e->u.rb, &ubi->used);
1320	} else if (in_wl_tree(e, root: &ubi->scrub)) {
1321	self_check_in_wl_tree(ubi, e, root: &ubi->scrub);
1322	rb_erase(&e->u.rb, &ubi->scrub);
1323	} else if (in_wl_tree(e, root: &ubi->erroneous)) {
1324	self_check_in_wl_tree(ubi, e, root: &ubi->erroneous);
1325	rb_erase(&e->u.rb, &ubi->erroneous);
1326	ubi->erroneous_peb_count -= `1`;
1327	ubi_assert(ubi->erroneous_peb_count >= `0`);
1328	/ Erroneous PEBs should be tortured /
1329	torture = `1`;
1330	} else {
1331	err = prot_queue_del(ubi, pnum: e->pnum);
1332	if (err) {
1333	ubi_err(ubi, fmt: "PEB %d not found", pnum);
1334	ubi_ro_mode(ubi);
1335	spin_unlock(lock: &ubi->wl_lock);
1336	up_read(sem: &ubi->fm_protect);
1337	return err;
1338	}
1339	}
1340	}
1341	spin_unlock(lock: &ubi->wl_lock);
1342
1343	err = schedule_erase(ubi, e, vol_id, lnum, torture, nested: false);
1344	if (err) {
1345	spin_lock(lock: &ubi->wl_lock);
1346	wl_tree_add(e, root: &ubi->used);
1347	spin_unlock(lock: &ubi->wl_lock);
1348	}
1349
1350	up_read(sem: &ubi->fm_protect);
1351	return err;
1352	}
1353
1354	/**
1355	* ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing.
1356	* @ubi: UBI device description object
1357	* @pnum: the physical eraseblock to schedule
1358	*
1359	* If a bit-flip in a physical eraseblock is detected, this physical eraseblock
1360	* needs scrubbing. This function schedules a physical eraseblock for
1361	* scrubbing which is done in background. This function returns zero in case of
1362	* success and a negative error code in case of failure.
1363	*/
1364	int ubi_wl_scrub_peb(struct ubi_device ubi, int* pnum)
1365	{
1366	struct ubi_wl_entry *e;
1367
1368	ubi_msg(ubi, fmt: "schedule PEB %d for scrubbing", pnum);
1369
1370	retry:
1371	spin_lock(lock: &ubi->wl_lock);
1372	e = ubi->lookuptbl[pnum];
1373	if (e == ubi->move_from \|\| in_wl_tree(e, root: &ubi->scrub) \|\|
1374	in_wl_tree(e, root: &ubi->erroneous)) {
1375	spin_unlock(lock: &ubi->wl_lock);
1376	return `0`;
1377	}
1378
1379	if (e == ubi->move_to) {
1380	/*
1381	* This physical eraseblock was used to move data to. The data
1382	* was moved but the PEB was not yet inserted to the proper
1383	* tree. We should just wait a little and let the WL worker
1384	* proceed.
1385	*/
1386	spin_unlock(lock: &ubi->wl_lock);
1387	dbg_wl("the PEB %d is not in proper tree, retry", pnum);
1388	yield();
1389	goto retry;
1390	}
1391
1392	if (in_wl_tree(e, root: &ubi->used)) {
1393	self_check_in_wl_tree(ubi, e, root: &ubi->used);
1394	rb_erase(&e->u.rb, &ubi->used);
1395	} else {
1396	int err;
1397
1398	err = prot_queue_del(ubi, pnum: e->pnum);
1399	if (err) {
1400	ubi_err(ubi, fmt: "PEB %d not found", pnum);
1401	ubi_ro_mode(ubi);
1402	spin_unlock(lock: &ubi->wl_lock);
1403	return err;
1404	}
1405	}
1406
1407	wl_tree_add(e, root: &ubi->scrub);
1408	spin_unlock(lock: &ubi->wl_lock);
1409
1410	/*
1411	* Technically scrubbing is the same as wear-leveling, so it is done
1412	* by the WL worker.
1413	*/
1414	return ensure_wear_leveling(ubi, nested: `0`);
1415	}
1416
1417	/**
1418	* ubi_wl_flush - flush all pending works.
1419	* @ubi: UBI device description object
1420	* @vol_id: the volume id to flush for
1421	* @lnum: the logical eraseblock number to flush for
1422	*
1423	* This function executes all pending works for a particular volume id /
1424	* logical eraseblock number pair. If either value is set to %UBI_ALL, then it
1425	* acts as a wildcard for all of the corresponding volume numbers or logical
1426	* eraseblock numbers. It returns zero in case of success and a negative error
1427	* code in case of failure.
1428	*/
1429	int ubi_wl_flush(struct ubi_device ubi, int* vol_id, int lnum)
1430	{
1431	int err = `0`;
1432	int found = `1`;
1433
1434	/*
1435	* Erase while the pending works queue is not empty, but not more than
1436	* the number of currently pending works.
1437	*/
1438	dbg_wl("flush pending work for LEB %d:%d (%d pending works)",
1439	vol_id, lnum, ubi->works_count);
1440
1441	while (found) {
1442	struct ubi_work wrk, tmp;
1443	found = `0`;
1444
1445	down_read(sem: &ubi->work_sem);
1446	spin_lock(lock: &ubi->wl_lock);
1447	list_for_each_entry_safe(wrk, tmp, &ubi->works, list) {
1448	if ((vol_id == UBI_ALL \|\| wrk->vol_id == vol_id) &&
1449	(lnum == UBI_ALL \|\| wrk->lnum == lnum)) {
1450	list_del(entry: &wrk->list);
1451	ubi->works_count -= `1`;
1452	ubi_assert(ubi->works_count >= `0`);
1453	spin_unlock(lock: &ubi->wl_lock);
1454
1455	err = wrk->func(ubi, wrk, `0`);
1456	if (err) {
1457	up_read(sem: &ubi->work_sem);
1458	return err;
1459	}
1460
1461	spin_lock(lock: &ubi->wl_lock);
1462	found = `1`;
1463	break;
1464	}
1465	}
1466	spin_unlock(lock: &ubi->wl_lock);
1467	up_read(sem: &ubi->work_sem);
1468	}
1469
1470	/*
1471	* Make sure all the works which have been done in parallel are
1472	* finished.
1473	*/
1474	down_write(sem: &ubi->work_sem);
1475	up_write(sem: &ubi->work_sem);
1476
1477	return err;
1478	}
1479
1480	static bool scrub_possible(struct ubi_device ubi, struct* ubi_wl_entry *e)
1481	{
1482	if (in_wl_tree(e, root: &ubi->scrub))
1483	return false;
1484	else if (in_wl_tree(e, root: &ubi->erroneous))
1485	return false;
1486	else if (ubi->move_from == e)
1487	return false;
1488	else if (ubi->move_to == e)
1489	return false;
1490
1491	return true;
1492	}
1493
1494	/**
1495	* ubi_bitflip_check - Check an eraseblock for bitflips and scrub it if needed.
1496	* @ubi: UBI device description object
1497	* @pnum: the physical eraseblock to schedule
1498	* @force: don't read the block, assume bitflips happened and take action.
1499	*
1500	* This function reads the given eraseblock and checks if bitflips occured.
1501	* In case of bitflips, the eraseblock is scheduled for scrubbing.
1502	* If scrubbing is forced with @force, the eraseblock is not read,
1503	* but scheduled for scrubbing right away.
1504	*
1505	* Returns:
1506	* %EINVAL, PEB is out of range
1507	* %ENOENT, PEB is no longer used by UBI
1508	* %EBUSY, PEB cannot be checked now or a check is currently running on it
1509	* %EAGAIN, bit flips happened but scrubbing is currently not possible
1510	* %EUCLEAN, bit flips happened and PEB is scheduled for scrubbing
1511	* %0, no bit flips detected
1512	*/
1513	int ubi_bitflip_check(struct ubi_device ubi, int* pnum, int force)
1514	{
1515	int err = `0`;
1516	struct ubi_wl_entry *e;
1517
1518	if (pnum < `0` \|\| pnum >= ubi->peb_count) {
1519	err = -EINVAL;
1520	goto out;
1521	}
1522
1523	/*
1524	* Pause all parallel work, otherwise it can happen that the
1525	* erase worker frees a wl entry under us.
1526	*/
1527	down_write(sem: &ubi->work_sem);
1528
1529	/*
1530	* Make sure that the wl entry does not change state while
1531	* inspecting it.
1532	*/
1533	spin_lock(lock: &ubi->wl_lock);
1534	e = ubi->lookuptbl[pnum];
1535	if (!e) {
1536	spin_unlock(lock: &ubi->wl_lock);
1537	err = -ENOENT;
1538	goto out_resume;
1539	}
1540
1541	/*
1542	* Does it make sense to check this PEB?
1543	*/
1544	if (!scrub_possible(ubi, e)) {
1545	spin_unlock(lock: &ubi->wl_lock);
1546	err = -EBUSY;
1547	goto out_resume;
1548	}
1549	spin_unlock(lock: &ubi->wl_lock);
1550
1551	if (!force) {
1552	mutex_lock(&ubi->buf_mutex);
1553	err = ubi_io_read(ubi, buf: ubi->peb_buf, pnum, offset: `0`, len: ubi->peb_size);
1554	mutex_unlock(lock: &ubi->buf_mutex);
1555	}
1556
1557	if (force \|\| err == UBI_IO_BITFLIPS) {
1558	/*
1559	* Okay, bit flip happened, let's figure out what we can do.
1560	*/
1561	spin_lock(lock: &ubi->wl_lock);
1562
1563	/*
1564	* Recheck. We released wl_lock, UBI might have killed the
1565	* wl entry under us.
1566	*/
1567	e = ubi->lookuptbl[pnum];
1568	if (!e) {
1569	spin_unlock(lock: &ubi->wl_lock);
1570	err = -ENOENT;
1571	goto out_resume;
1572	}
1573
1574	/*
1575	* Need to re-check state
1576	*/
1577	if (!scrub_possible(ubi, e)) {
1578	spin_unlock(lock: &ubi->wl_lock);
1579	err = -EBUSY;
1580	goto out_resume;
1581	}
1582
1583	if (in_pq(ubi, e)) {
1584	prot_queue_del(ubi, pnum: e->pnum);
1585	wl_tree_add(e, root: &ubi->scrub);
1586	spin_unlock(lock: &ubi->wl_lock);
1587
1588	err = ensure_wear_leveling(ubi, nested: `1`);
1589	} else if (in_wl_tree(e, root: &ubi->used)) {
1590	rb_erase(&e->u.rb, &ubi->used);
1591	wl_tree_add(e, root: &ubi->scrub);
1592	spin_unlock(lock: &ubi->wl_lock);
1593
1594	err = ensure_wear_leveling(ubi, nested: `1`);
1595	} else if (in_wl_tree(e, root: &ubi->free)) {
1596	rb_erase(&e->u.rb, &ubi->free);
1597	ubi->free_count--;
1598	spin_unlock(lock: &ubi->wl_lock);
1599
1600	/*
1601	* This PEB is empty we can schedule it for
1602	* erasure right away. No wear leveling needed.
1603	*/
1604	err = schedule_erase(ubi, e, UBI_UNKNOWN, UBI_UNKNOWN,
1605	torture: force ? `0` : `1`, nested: true);
1606	} else {
1607	spin_unlock(lock: &ubi->wl_lock);
1608	err = -EAGAIN;
1609	}
1610
1611	if (!err && !force)
1612	err = -EUCLEAN;
1613	} else {
1614	err = `0`;
1615	}
1616
1617	out_resume:
1618	up_write(sem: &ubi->work_sem);
1619	out:
1620
1621	return err;
1622	}
1623
1624	/**
1625	* tree_destroy - destroy an RB-tree.
1626	* @ubi: UBI device description object
1627	* @root: the root of the tree to destroy
1628	*/
1629	static void tree_destroy(struct ubi_device ubi, struct* rb_root *root)
1630	{
1631	struct rb_node *rb;
1632	struct ubi_wl_entry *e;
1633
1634	rb = root->rb_node;
1635	while (rb) {
1636	if (rb->rb_left)
1637	rb = rb->rb_left;
1638	else if (rb->rb_right)
1639	rb = rb->rb_right;
1640	else {
1641	e = rb_entry(rb, struct ubi_wl_entry, u.rb);
1642
1643	rb = rb_parent(rb);
1644	if (rb) {
1645	if (rb->rb_left == &e->u.rb)
1646	rb->rb_left = NULL;
1647	else
1648	rb->rb_right = NULL;
1649	}
1650
1651	wl_entry_destroy(ubi, e);
1652	}
1653	}
1654	}
1655
1656	/**
1657	* ubi_thread - UBI background thread.
1658	* @u: the UBI device description object pointer
1659	*/
1660	int ubi_thread(void *u)
1661	{
1662	int failures = `0`;
1663	struct ubi_device *ubi = u;
1664
1665	ubi_msg(ubi, fmt: "background thread \"%s\" started, PID %d",
1666	ubi->bgt_name, task_pid_nr(current));
1667
1668	set_freezable();
1669	for (;;) {
1670	int err;
1671
1672	if (kthread_should_stop())
1673	break;
1674
1675	if (try_to_freeze())
1676	continue;
1677
1678	spin_lock(lock: &ubi->wl_lock);
1679	if (list_empty(head: &ubi->works) \|\| ubi->ro_mode \|\|
1680	!ubi->thread_enabled \|\| ubi_dbg_is_bgt_disabled(ubi)) {
1681	set_current_state(TASK_INTERRUPTIBLE);
1682	spin_unlock(lock: &ubi->wl_lock);
1683
1684	/*
1685	* Check kthread_should_stop() after we set the task
1686	* state to guarantee that we either see the stop bit
1687	* and exit or the task state is reset to runnable such
1688	* that it's not scheduled out indefinitely and detects
1689	* the stop bit at kthread_should_stop().
1690	*/
1691	if (kthread_should_stop()) {
1692	set_current_state(TASK_RUNNING);
1693	break;
1694	}
1695
1696	schedule();
1697	continue;
1698	}
1699	spin_unlock(lock: &ubi->wl_lock);
1700
1701	err = do_work(ubi, NULL);
1702	if (err) {
1703	ubi_err(ubi, fmt: "%s: work failed with error code %d",
1704	ubi->bgt_name, err);
1705	if (failures++ > WL_MAX_FAILURES) {
1706	/*
1707	* Too many failures, disable the thread and
1708	* switch to read-only mode.
1709	*/
1710	ubi_msg(ubi, fmt: "%s: %d consecutive failures",
1711	ubi->bgt_name, WL_MAX_FAILURES);
1712	ubi_ro_mode(ubi);
1713	ubi->thread_enabled = `0`;
1714	continue;
1715	}
1716	} else
1717	failures = `0`;
1718
1719	cond_resched();
1720	}
1721
1722	dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
1723	ubi->thread_enabled = `0`;
1724	return `0`;
1725	}
1726
1727	/**
1728	* shutdown_work - shutdown all pending works.
1729	* @ubi: UBI device description object
1730	*/
1731	static void shutdown_work(struct ubi_device *ubi)
1732	{
1733	while (!list_empty(head: &ubi->works)) {
1734	struct ubi_work *wrk;
1735
1736	wrk = list_entry(ubi->works.next, struct ubi_work, list);
1737	list_del(entry: &wrk->list);
1738	wrk->func(ubi, wrk, `1`);
1739	ubi->works_count -= `1`;
1740	ubi_assert(ubi->works_count >= `0`);
1741	}
1742	}
1743
1744	/**
1745	* erase_aeb - erase a PEB given in UBI attach info PEB
1746	* @ubi: UBI device description object
1747	* @aeb: UBI attach info PEB
1748	* @sync: If true, erase synchronously. Otherwise schedule for erasure
1749	*/
1750	static int erase_aeb(struct ubi_device ubi, struct* ubi_ainf_peb *aeb, bool sync)
1751	{
1752	struct ubi_wl_entry *e;
1753	int err;
1754
1755	e = kmem_cache_alloc(cachep: ubi_wl_entry_slab, GFP_KERNEL);
1756	if (!e)
1757	return -ENOMEM;
1758
1759	e->pnum = aeb->pnum;
1760	e->ec = aeb->ec;
1761	ubi->lookuptbl[e->pnum] = e;
1762
1763	if (sync) {
1764	err = ubi_sync_erase(ubi, e, torture: false);
1765	if (err)
1766	goto out_free;
1767
1768	wl_tree_add(e, root: &ubi->free);
1769	ubi->free_count++;
1770	} else {
1771	err = schedule_erase(ubi, e, vol_id: aeb->vol_id, lnum: aeb->lnum, torture: `0`, nested: false);
1772	if (err)
1773	goto out_free;
1774	}
1775
1776	return `0`;
1777
1778	out_free:
1779	wl_entry_destroy(ubi, e);
1780
1781	return err;
1782	}
1783
1784	/**
1785	* ubi_wl_init - initialize the WL sub-system using attaching information.
1786	* @ubi: UBI device description object
1787	* @ai: attaching information
1788	*
1789	* This function returns zero in case of success, and a negative error code in
1790	* case of failure.
1791	*/
1792	int ubi_wl_init(struct ubi_device ubi, struct* ubi_attach_info *ai)
1793	{
1794	int err, i, reserved_pebs, found_pebs = `0`;
1795	struct rb_node rb1, rb2;
1796	struct ubi_ainf_volume *av;
1797	struct ubi_ainf_peb aeb, tmp;
1798	struct ubi_wl_entry *e;
1799
1800	ubi->used = ubi->erroneous = ubi->free = ubi->scrub = RB_ROOT;
1801	spin_lock_init(&ubi->wl_lock);
1802	mutex_init(&ubi->move_mutex);
1803	init_rwsem(&ubi->work_sem);
1804	ubi->max_ec = ai->max_ec;
1805	INIT_LIST_HEAD(list: &ubi->works);
1806
1807	sprintf(buf: ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num);
1808
1809	err = -ENOMEM;
1810	ubi->lookuptbl = kcalloc(n: ubi->peb_count, size: sizeof(void *), GFP_KERNEL);
1811	if (!ubi->lookuptbl)
1812	return err;
1813
1814	for (i = `0`; i < UBI_PROT_QUEUE_LEN; i++)
1815	INIT_LIST_HEAD(list: &ubi->pq[i]);
1816	ubi->pq_head = `0`;
1817
1818	ubi->free_count = `0`;
1819	list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) {
1820	cond_resched();
1821
1822	err = erase_aeb(ubi, aeb, sync: false);
1823	if (err)
1824	goto out_free;
1825
1826	found_pebs++;
1827	}
1828
1829	list_for_each_entry(aeb, &ai->free, u.list) {
1830	cond_resched();
1831
1832	e = kmem_cache_alloc(cachep: ubi_wl_entry_slab, GFP_KERNEL);
1833	if (!e) {
1834	err = -ENOMEM;
1835	goto out_free;
1836	}
1837
1838	e->pnum = aeb->pnum;
1839	e->ec = aeb->ec;
1840	ubi_assert(e->ec >= `0`);
1841
1842	wl_tree_add(e, root: &ubi->free);
1843	ubi->free_count++;
1844
1845	ubi->lookuptbl[e->pnum] = e;
1846
1847	found_pebs++;
1848	}
1849
1850	ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) {
1851	ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) {
1852	cond_resched();
1853
1854	e = kmem_cache_alloc(cachep: ubi_wl_entry_slab, GFP_KERNEL);
1855	if (!e) {
1856	err = -ENOMEM;
1857	goto out_free;
1858	}
1859
1860	e->pnum = aeb->pnum;
1861	e->ec = aeb->ec;
1862	ubi->lookuptbl[e->pnum] = e;
1863
1864	if (!aeb->scrub) {
1865	dbg_wl("add PEB %d EC %d to the used tree",
1866	e->pnum, e->ec);
1867	wl_tree_add(e, root: &ubi->used);
1868	} else {
1869	dbg_wl("add PEB %d EC %d to the scrub tree",
1870	e->pnum, e->ec);
1871	wl_tree_add(e, root: &ubi->scrub);
1872	}
1873
1874	found_pebs++;
1875	}
1876	}
1877
1878	list_for_each_entry(aeb, &ai->fastmap, u.list) {
1879	cond_resched();
1880
1881	e = ubi_find_fm_block(ubi, pnum: aeb->pnum);
1882
1883	if (e) {
1884	ubi_assert(!ubi->lookuptbl[e->pnum]);
1885	ubi->lookuptbl[e->pnum] = e;
1886	} else {
1887	bool sync = false;
1888
1889	/*
1890	* Usually old Fastmap PEBs are scheduled for erasure
1891	* and we don't have to care about them but if we face
1892	* an power cut before scheduling them we need to
1893	* take care of them here.
1894	*/
1895	if (ubi->lookuptbl[aeb->pnum])
1896	continue;
1897
1898	/*
1899	* The fastmap update code might not find a free PEB for
1900	* writing the fastmap anchor to and then reuses the
1901	* current fastmap anchor PEB. When this PEB gets erased
1902	* and a power cut happens before it is written again we
1903	* must make sure that the fastmap attach code doesn't
1904	* find any outdated fastmap anchors, hence we erase the
1905	* outdated fastmap anchor PEBs synchronously here.
1906	*/
1907	if (aeb->vol_id == UBI_FM_SB_VOLUME_ID)
1908	sync = true;
1909
1910	err = erase_aeb(ubi, aeb, sync);
1911	if (err)
1912	goto out_free;
1913	}
1914
1915	found_pebs++;
1916	}
1917
1918	dbg_wl("found %i PEBs", found_pebs);
1919
1920	ubi_assert(ubi->good_peb_count == found_pebs);
1921
1922	reserved_pebs = WL_RESERVED_PEBS;
1923	ubi_fastmap_init(ubi, count: &reserved_pebs);
1924
1925	if (ubi->avail_pebs < reserved_pebs) {
1926	ubi_err(ubi, fmt: "no enough physical eraseblocks (%d, need %d)",
1927	ubi->avail_pebs, reserved_pebs);
1928	if (ubi->corr_peb_count)
1929	ubi_err(ubi, fmt: "%d PEBs are corrupted and not used",
1930	ubi->corr_peb_count);
1931	err = -ENOSPC;
1932	goto out_free;
1933	}
1934	ubi->avail_pebs -= reserved_pebs;
1935	ubi->rsvd_pebs += reserved_pebs;
1936
1937	/ Schedule wear-leveling if needed /
1938	err = ensure_wear_leveling(ubi, nested: `0`);
1939	if (err)
1940	goto out_free;
1941
1942	#ifdef CONFIG_MTD_UBI_FASTMAP
1943	if (!ubi->ro_mode && !ubi->fm_disabled)
1944	ubi_ensure_anchor_pebs(ubi);
1945	#endif
1946	return `0`;
1947
1948	out_free:
1949	shutdown_work(ubi);
1950	tree_destroy(ubi, root: &ubi->used);
1951	tree_destroy(ubi, root: &ubi->free);
1952	tree_destroy(ubi, root: &ubi->scrub);
1953	kfree(objp: ubi->lookuptbl);
1954	return err;
1955	}
1956
1957	/**
1958	* protection_queue_destroy - destroy the protection queue.
1959	* @ubi: UBI device description object
1960	*/
1961	static void protection_queue_destroy(struct ubi_device *ubi)
1962	{
1963	int i;
1964	struct ubi_wl_entry e, tmp;
1965
1966	for (i = `0`; i < UBI_PROT_QUEUE_LEN; ++i) {
1967	list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) {
1968	list_del(entry: &e->u.list);
1969	wl_entry_destroy(ubi, e);
1970	}
1971	}
1972	}
1973
1974	/**
1975	* ubi_wl_close - close the wear-leveling sub-system.
1976	* @ubi: UBI device description object
1977	*/
1978	void ubi_wl_close(struct ubi_device *ubi)
1979	{
1980	dbg_wl("close the WL sub-system");
1981	ubi_fastmap_close(ubi);
1982	shutdown_work(ubi);
1983	protection_queue_destroy(ubi);
1984	tree_destroy(ubi, root: &ubi->used);
1985	tree_destroy(ubi, root: &ubi->erroneous);
1986	tree_destroy(ubi, root: &ubi->free);
1987	tree_destroy(ubi, root: &ubi->scrub);
1988	kfree(objp: ubi->lookuptbl);
1989	}
1990
1991	/**
1992	* self_check_ec - make sure that the erase counter of a PEB is correct.
1993	* @ubi: UBI device description object
1994	* @pnum: the physical eraseblock number to check
1995	* @ec: the erase counter to check
1996	*
1997	* This function returns zero if the erase counter of physical eraseblock @pnum
1998	* is equivalent to @ec, and a negative error code if not or if an error
1999	* occurred.
2000	*/
2001	static int self_check_ec(struct ubi_device ubi, int* pnum, int ec)
2002	{
2003	int err;
2004	long long read_ec;
2005	struct ubi_ec_hdr *ec_hdr;
2006
2007	if (!ubi_dbg_chk_gen(ubi))
2008	return `0`;
2009
2010	ec_hdr = kzalloc(size: ubi->ec_hdr_alsize, GFP_NOFS);
2011	if (!ec_hdr)
2012	return -ENOMEM;
2013
2014	err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, verbose: `0`);
2015	if (err && err != UBI_IO_BITFLIPS) {
2016	/ The header does not have to exist /
2017	err = `0`;
2018	goto out_free;
2019	}
2020
2021	read_ec = be64_to_cpu(ec_hdr->ec);
2022	if (ec != read_ec && read_ec - ec > `1`) {
2023	ubi_err(ubi, fmt: "self-check failed for PEB %d", pnum);
2024	ubi_err(ubi, fmt: "read EC is %lld, should be %d", read_ec, ec);
2025	dump_stack();
2026	err = `1`;
2027	} else
2028	err = `0`;
2029
2030	out_free:
2031	kfree(objp: ec_hdr);
2032	return err;
2033	}
2034
2035	/**
2036	* self_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree.
2037	* @ubi: UBI device description object
2038	* @e: the wear-leveling entry to check
2039	* @root: the root of the tree
2040	*
2041	* This function returns zero if @e is in the @root RB-tree and %-EINVAL if it
2042	* is not.
2043	*/
2044	static int self_check_in_wl_tree(const struct ubi_device *ubi,
2045	struct ubi_wl_entry e, struct* rb_root *root)
2046	{
2047	if (!ubi_dbg_chk_gen(ubi))
2048	return `0`;
2049
2050	if (in_wl_tree(e, root))
2051	return `0`;
2052
2053	ubi_err(ubi, fmt: "self-check failed for PEB %d, EC %d, RB-tree %p ",
2054	e->pnum, e->ec, root);
2055	dump_stack();
2056	return -EINVAL;
2057	}
2058
2059	/**
2060	* self_check_in_pq - check if wear-leveling entry is in the protection
2061	* queue.
2062	* @ubi: UBI device description object
2063	* @e: the wear-leveling entry to check
2064	*
2065	* This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not.
2066	*/
2067	static int self_check_in_pq(const struct ubi_device *ubi,
2068	struct ubi_wl_entry *e)
2069	{
2070	if (!ubi_dbg_chk_gen(ubi))
2071	return `0`;
2072
2073	if (in_pq(ubi, e))
2074	return `0`;
2075
2076	ubi_err(ubi, fmt: "self-check failed for PEB %d, EC %d, Protect queue",
2077	e->pnum, e->ec);
2078	dump_stack();
2079	return -EINVAL;
2080	}
2081	#ifndef CONFIG_MTD_UBI_FASTMAP
2082	static struct ubi_wl_entry get_peb_for_wl(struct* ubi_device *ubi)
2083	{
2084	struct ubi_wl_entry *e;
2085
2086	e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, `0`);
2087	self_check_in_wl_tree(ubi, e, &ubi->free);
2088	ubi->free_count--;
2089	ubi_assert(ubi->free_count >= `0`);
2090	rb_erase(&e->u.rb, &ubi->free);
2091
2092	return e;
2093	}
2094
2095	/**
2096	* produce_free_peb - produce a free physical eraseblock.
2097	* @ubi: UBI device description object
2098	*
2099	* This function tries to make a free PEB by means of synchronous execution of
2100	* pending works. This may be needed if, for example the background thread is
2101	* disabled. Returns zero in case of success and a negative error code in case
2102	* of failure.
2103	*/
2104	static int produce_free_peb(struct ubi_device *ubi)
2105	{
2106	int err;
2107
2108	while (!ubi->free.rb_node && ubi->works_count) {
2109	spin_unlock(&ubi->wl_lock);
2110
2111	dbg_wl("do one work synchronously");
2112	err = do_work(ubi, NULL);
2113
2114	spin_lock(&ubi->wl_lock);
2115	if (err)
2116	return err;
2117	}
2118
2119	return `0`;
2120	}
2121
2122	/**
2123	* ubi_wl_get_peb - get a physical eraseblock.
2124	* @ubi: UBI device description object
2125	*
2126	* This function returns a physical eraseblock in case of success and a
2127	* negative error code in case of failure.
2128	* Returns with ubi->fm_eba_sem held in read mode!
2129	*/
2130	int ubi_wl_get_peb(struct ubi_device *ubi)
2131	{
2132	int err;
2133	struct ubi_wl_entry *e;
2134
2135	retry:
2136	down_read(&ubi->fm_eba_sem);
2137	spin_lock(&ubi->wl_lock);
2138	if (!ubi->free.rb_node) {
2139	if (ubi->works_count == `0`) {
2140	ubi_err(ubi, "no free eraseblocks");
2141	ubi_assert(list_empty(&ubi->works));
2142	spin_unlock(&ubi->wl_lock);
2143	return -ENOSPC;
2144	}
2145
2146	err = produce_free_peb(ubi);
2147	if (err < `0`) {
2148	spin_unlock(&ubi->wl_lock);
2149	return err;
2150	}
2151	spin_unlock(&ubi->wl_lock);
2152	up_read(&ubi->fm_eba_sem);
2153	goto retry;
2154
2155	}
2156	e = wl_get_wle(ubi);
2157	prot_queue_add(ubi, e);
2158	spin_unlock(&ubi->wl_lock);
2159
2160	err = ubi_self_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset,
2161	ubi->peb_size - ubi->vid_hdr_aloffset);
2162	if (err) {
2163	ubi_err(ubi, "new PEB %d does not contain all 0xFF bytes", e->pnum);
2164	return err;
2165	}
2166
2167	return e->pnum;
2168	}
2169	#else
2170	#include "fastmap-wl.c"
2171	#endif
2172

source code of linux/drivers/mtd/ubi/wl.c