discard.c source code [linux/fs/btrfs/discard.c]

1	// SPDX-License-Identifier: GPL-2.0
2
3	#include <linux/jiffies.h>
4	#include <linux/kernel.h>
5	#include <linux/ktime.h>
6	#include <linux/list.h>
7	#include <linux/math64.h>
8	#include <linux/sizes.h>
9	#include <linux/workqueue.h>
10	#include "ctree.h"
11	#include "block-group.h"
12	#include "discard.h"
13	#include "free-space-cache.h"
14	#include "fs.h"
15
16	/*
17	* This contains the logic to handle async discard.
18	*
19	* Async discard manages trimming of free space outside of transaction commit.
20	* Discarding is done by managing the block_groups on a LRU list based on free
21	* space recency. Two passes are used to first prioritize discarding extents
22	* and then allow for trimming in the bitmap the best opportunity to coalesce.
23	* The block_groups are maintained on multiple lists to allow for multiple
24	* passes with different discard filter requirements. A delayed work item is
25	* used to manage discarding with timeout determined by a max of the delay
26	* incurred by the iops rate limit, the byte rate limit, and the max delay of
27	* BTRFS_DISCARD_MAX_DELAY.
28	*
29	* Note, this only keeps track of block_groups that are explicitly for data.
30	* Mixed block_groups are not supported.
31	*
32	* The first list is special to manage discarding of fully free block groups.
33	* This is necessary because we issue a final trim for a full free block group
34	* after forgetting it. When a block group becomes unused, instead of directly
35	* being added to the unused_bgs list, we add it to this first list. Then
36	* from there, if it becomes fully discarded, we place it onto the unused_bgs
37	* list.
38	*
39	* The in-memory free space cache serves as the backing state for discard.
40	* Consequently this means there is no persistence. We opt to load all the
41	* block groups in as not discarded, so the mount case degenerates to the
42	* crashing case.
43	*
44	* As the free space cache uses bitmaps, there exists a tradeoff between
45	* ease/efficiency for find_free_extent() and the accuracy of discard state.
46	* Here we opt to let untrimmed regions merge with everything while only letting
47	* trimmed regions merge with other trimmed regions. This can cause
48	* overtrimming, but the coalescing benefit seems to be worth it. Additionally,
49	* bitmap state is tracked as a whole. If we're able to fully trim a bitmap,
50	* the trimmed flag is set on the bitmap. Otherwise, if an allocation comes in,
51	* this resets the state and we will retry trimming the whole bitmap. This is a
52	* tradeoff between discard state accuracy and the cost of accounting.
53	*/
54
55	/ This is an initial delay to give some chance for block reuse /
56	#define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC)
57	#define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC)
58
59	#define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL)
60	#define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL)
61	#define BTRFS_DISCARD_MAX_IOPS (1000U)
62
63	/ Monotonically decreasing minimum length filters after index 0 /
64	static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
65	`0`,
66	BTRFS_ASYNC_DISCARD_MAX_FILTER,
67	BTRFS_ASYNC_DISCARD_MIN_FILTER
68	};
69
70	static struct list_head get_discard_list(struct* btrfs_discard_ctl *discard_ctl,
71	struct btrfs_block_group *block_group)
72	{
73	return &discard_ctl->discard_list[block_group->discard_index];
74	}
75
76	/*
77	* Determine if async discard should be running.
78	*
79	* @discard_ctl: discard control
80	*
81	* Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
82	*/
83	static bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
84	{
85	struct btrfs_fs_info *fs_info = container_of(discard_ctl,
86	struct btrfs_fs_info,
87	discard_ctl);
88
89	return (!(fs_info->sb->s_flags & SB_RDONLY) &&
90	test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
91	}
92
93	static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
94	struct btrfs_block_group *block_group)
95	{
96	lockdep_assert_held(&discard_ctl->lock);
97	if (!btrfs_run_discard_work(discard_ctl))
98	return;
99
100	if (list_empty(head: &block_group->discard_list) \|\|
101	block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
102	if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
103	block_group->discard_index = BTRFS_DISCARD_INDEX_START;
104	block_group->discard_eligible_time = (ktime_get_ns() +
105	BTRFS_DISCARD_DELAY);
106	block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
107	}
108	if (list_empty(head: &block_group->discard_list))
109	btrfs_get_block_group(cache: block_group);
110
111	list_move_tail(list: &block_group->discard_list,
112	head: get_discard_list(discard_ctl, block_group));
113	}
114
115	static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
116	struct btrfs_block_group *block_group)
117	{
118	if (!btrfs_is_block_group_data_only(block_group))
119	return;
120
121	spin_lock(lock: &discard_ctl->lock);
122	__add_to_discard_list(discard_ctl, block_group);
123	spin_unlock(lock: &discard_ctl->lock);
124	}
125
126	static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
127	struct btrfs_block_group *block_group)
128	{
129	bool queued;
130
131	spin_lock(lock: &discard_ctl->lock);
132
133	queued = !list_empty(head: &block_group->discard_list);
134
135	if (!btrfs_run_discard_work(discard_ctl)) {
136	spin_unlock(lock: &discard_ctl->lock);
137	return;
138	}
139
140	list_del_init(entry: &block_group->discard_list);
141
142	block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
143	block_group->discard_eligible_time = (ktime_get_ns() +
144	BTRFS_DISCARD_UNUSED_DELAY);
145	block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
146	if (!queued)
147	btrfs_get_block_group(cache: block_group);
148	list_add_tail(new: &block_group->discard_list,
149	head: &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
150
151	spin_unlock(lock: &discard_ctl->lock);
152	}
153
154	static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
155	struct btrfs_block_group *block_group)
156	{
157	bool running = false;
158	bool queued = false;
159
160	spin_lock(lock: &discard_ctl->lock);
161
162	if (block_group == discard_ctl->block_group) {
163	running = true;
164	discard_ctl->block_group = NULL;
165	}
166
167	block_group->discard_eligible_time = `0`;
168	queued = !list_empty(head: &block_group->discard_list);
169	list_del_init(entry: &block_group->discard_list);
170	/*
171	* If the block group is currently running in the discard workfn, we
172	* don't want to deref it, since it's still being used by the workfn.
173	* The workfn will notice this case and deref the block group when it is
174	* finished.
175	*/
176	if (queued && !running)
177	btrfs_put_block_group(cache: block_group);
178
179	spin_unlock(lock: &discard_ctl->lock);
180
181	return running;
182	}
183
184	/*
185	* Find block_group that's up next for discarding.
186	*
187	* @discard_ctl: discard control
188	* @now: current time
189	*
190	* Iterate over the discard lists to find the next block_group up for
191	* discarding checking the discard_eligible_time of block_group.
192	*/
193	static struct btrfs_block_group *find_next_block_group(
194	struct btrfs_discard_ctl *discard_ctl,
195	u64 now)
196	{
197	struct btrfs_block_group ret_block_group = NULL, block_group;
198	int i;
199
200	for (i = `0`; i < BTRFS_NR_DISCARD_LISTS; i++) {
201	struct list_head *discard_list = &discard_ctl->discard_list[i];
202
203	if (!list_empty(head: discard_list)) {
204	block_group = list_first_entry(discard_list,
205	struct btrfs_block_group,
206	discard_list);
207
208	if (!ret_block_group)
209	ret_block_group = block_group;
210
211	if (ret_block_group->discard_eligible_time < now)
212	break;
213
214	if (ret_block_group->discard_eligible_time >
215	block_group->discard_eligible_time)
216	ret_block_group = block_group;
217	}
218	}
219
220	return ret_block_group;
221	}
222
223	/*
224	* Look up next block group and set it for use.
225	*
226	* @discard_ctl: discard control
227	* @discard_state: the discard_state of the block_group after state management
228	* @discard_index: the discard_index of the block_group after state management
229	* @now: time when discard was invoked, in ns
230	*
231	* Wrap find_next_block_group() and set the block_group to be in use.
232	* @discard_state's control flow is managed here. Variables related to
233	* @discard_state are reset here as needed (eg. @discard_cursor). @discard_state
234	* and @discard_index are remembered as it may change while we're discarding,
235	* but we want the discard to execute in the context determined here.
236	*/
237	static struct btrfs_block_group *peek_discard_list(
238	struct btrfs_discard_ctl *discard_ctl,
239	enum btrfs_discard_state *discard_state,
240	int *discard_index, u64 now)
241	{
242	struct btrfs_block_group *block_group;
243
244	spin_lock(lock: &discard_ctl->lock);
245	again:
246	block_group = find_next_block_group(discard_ctl, now);
247
248	if (block_group && now >= block_group->discard_eligible_time) {
249	if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
250	block_group->used != `0`) {
251	if (btrfs_is_block_group_data_only(block_group)) {
252	__add_to_discard_list(discard_ctl, block_group);
253	} else {
254	list_del_init(entry: &block_group->discard_list);
255	btrfs_put_block_group(cache: block_group);
256	}
257	goto again;
258	}
259	if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
260	block_group->discard_cursor = block_group->start;
261	block_group->discard_state = BTRFS_DISCARD_EXTENTS;
262	}
263	discard_ctl->block_group = block_group;
264	}
265	if (block_group) {
266	*discard_state = block_group->discard_state;
267	*discard_index = block_group->discard_index;
268	}
269	spin_unlock(lock: &discard_ctl->lock);
270
271	return block_group;
272	}
273
274	/*
275	* Update a block group's filters.
276	*
277	* @block_group: block group of interest
278	* @bytes: recently freed region size after coalescing
279	*
280	* Async discard maintains multiple lists with progressively smaller filters
281	* to prioritize discarding based on size. Should a free space that matches
282	* a larger filter be returned to the free_space_cache, prioritize that discard
283	* by moving @block_group to the proper filter.
284	*/
285	void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
286	u64 bytes)
287	{
288	struct btrfs_discard_ctl *discard_ctl;
289
290	if (!block_group \|\|
291	!btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
292	return;
293
294	discard_ctl = &block_group->fs_info->discard_ctl;
295
296	if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
297	bytes >= discard_minlen[block_group->discard_index - `1`]) {
298	int i;
299
300	remove_from_discard_list(discard_ctl, block_group);
301
302	for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
303	i++) {
304	if (bytes >= discard_minlen[i]) {
305	block_group->discard_index = i;
306	add_to_discard_list(discard_ctl, block_group);
307	break;
308	}
309	}
310	}
311	}
312
313	/*
314	* Move a block group along the discard lists.
315	*
316	* @discard_ctl: discard control
317	* @block_group: block_group of interest
318	*
319	* Increment @block_group's discard_index. If it falls of the list, let it be.
320	* Otherwise add it back to the appropriate list.
321	*/
322	static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
323	struct btrfs_block_group *block_group)
324	{
325	block_group->discard_index++;
326	if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
327	block_group->discard_index = `1`;
328	return;
329	}
330
331	add_to_discard_list(discard_ctl, block_group);
332	}
333
334	/*
335	* Remove a block_group from the discard lists.
336	*
337	* @discard_ctl: discard control
338	* @block_group: block_group of interest
339	*
340	* Remove @block_group from the discard lists. If necessary, wait on the
341	* current work and then reschedule the delayed work.
342	*/
343	void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
344	struct btrfs_block_group *block_group)
345	{
346	if (remove_from_discard_list(discard_ctl, block_group)) {
347	cancel_delayed_work_sync(dwork: &discard_ctl->work);
348	btrfs_discard_schedule_work(discard_ctl, override: true);
349	}
350	}
351
352	/*
353	* Handles queuing the block_groups.
354	*
355	* @discard_ctl: discard control
356	* @block_group: block_group of interest
357	*
358	* Maintain the LRU order of the discard lists.
359	*/
360	void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
361	struct btrfs_block_group *block_group)
362	{
363	if (!block_group \|\| !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
364	return;
365
366	if (block_group->used == `0`)
367	add_to_discard_unused_list(discard_ctl, block_group);
368	else
369	add_to_discard_list(discard_ctl, block_group);
370
371	if (!delayed_work_pending(&discard_ctl->work))
372	btrfs_discard_schedule_work(discard_ctl, override: false);
373	}
374
375	static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
376	u64 now, bool override)
377	{
378	struct btrfs_block_group *block_group;
379
380	if (!btrfs_run_discard_work(discard_ctl))
381	return;
382	if (!override && delayed_work_pending(&discard_ctl->work))
383	return;
384
385	block_group = find_next_block_group(discard_ctl, now);
386	if (block_group) {
387	u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
388	u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
389
390	/*
391	* A single delayed workqueue item is responsible for
392	* discarding, so we can manage the bytes rate limit by keeping
393	* track of the previous discard.
394	*/
395	if (kbps_limit && discard_ctl->prev_discard) {
396	u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
397	u64 bps_delay = div64_u64(dividend: discard_ctl->prev_discard *
398	NSEC_PER_SEC, divisor: bps_limit);
399
400	delay = max(delay, bps_delay);
401	}
402
403	/*
404	* This timeout is to hopefully prevent immediate discarding
405	* in a recently allocated block group.
406	*/
407	if (now < block_group->discard_eligible_time) {
408	u64 bg_timeout = block_group->discard_eligible_time - now;
409
410	delay = max(delay, bg_timeout);
411	}
412
413	if (override && discard_ctl->prev_discard) {
414	u64 elapsed = now - discard_ctl->prev_discard_time;
415
416	if (delay > elapsed)
417	delay -= elapsed;
418	else
419	delay = `0`;
420	}
421
422	mod_delayed_work(wq: discard_ctl->discard_workers,
423	dwork: &discard_ctl->work, delay: nsecs_to_jiffies(n: delay));
424	}
425	}
426
427	/*
428	* Responsible for scheduling the discard work.
429	*
430	* @discard_ctl: discard control
431	* @override: override the current timer
432	*
433	* Discards are issued by a delayed workqueue item. @override is used to
434	* update the current delay as the baseline delay interval is reevaluated on
435	* transaction commit. This is also maxed with any other rate limit.
436	*/
437	void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
438	bool override)
439	{
440	const u64 now = ktime_get_ns();
441
442	spin_lock(lock: &discard_ctl->lock);
443	__btrfs_discard_schedule_work(discard_ctl, now, override);
444	spin_unlock(lock: &discard_ctl->lock);
445	}
446
447	/*
448	* Determine next step of a block_group.
449	*
450	* @discard_ctl: discard control
451	* @block_group: block_group of interest
452	*
453	* Determine the next step for a block group after it's finished going through
454	* a pass on a discard list. If it is unused and fully trimmed, we can mark it
455	* unused and send it to the unused_bgs path. Otherwise, pass it onto the
456	* appropriate filter list or let it fall off.
457	*/
458	static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
459	struct btrfs_block_group *block_group)
460	{
461	remove_from_discard_list(discard_ctl, block_group);
462
463	if (block_group->used == `0`) {
464	if (btrfs_is_free_space_trimmed(block_group))
465	btrfs_mark_bg_unused(bg: block_group);
466	else
467	add_to_discard_unused_list(discard_ctl, block_group);
468	} else {
469	btrfs_update_discard_index(discard_ctl, block_group);
470	}
471	}
472
473	/*
474	* Discard work queue callback
475	*
476	* @work: work
477	*
478	* Find the next block_group to start discarding and then discard a single
479	* region. It does this in a two-pass fashion: first extents and second
480	* bitmaps. Completely discarded block groups are sent to the unused_bgs path.
481	*/
482	static void btrfs_discard_workfn(struct work_struct *work)
483	{
484	struct btrfs_discard_ctl *discard_ctl;
485	struct btrfs_block_group *block_group;
486	enum btrfs_discard_state discard_state;
487	int discard_index = `0`;
488	u64 trimmed = `0`;
489	u64 minlen = `0`;
490	u64 now = ktime_get_ns();
491
492	discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
493
494	block_group = peek_discard_list(discard_ctl, discard_state: &discard_state,
495	discard_index: &discard_index, now);
496	if (!block_group \|\| !btrfs_run_discard_work(discard_ctl))
497	return;
498	if (now < block_group->discard_eligible_time) {
499	btrfs_discard_schedule_work(discard_ctl, override: false);
500	return;
501	}
502
503	/ Perform discarding /
504	minlen = discard_minlen[discard_index];
505
506	if (discard_state == BTRFS_DISCARD_BITMAPS) {
507	u64 maxlen = `0`;
508
509	/*
510	* Use the previous levels minimum discard length as the max
511	* length filter. In the case something is added to make a
512	* region go beyond the max filter, the entire bitmap is set
513	* back to BTRFS_TRIM_STATE_UNTRIMMED.
514	*/
515	if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
516	maxlen = discard_minlen[discard_index - `1`];
517
518	btrfs_trim_block_group_bitmaps(block_group, trimmed: &trimmed,
519	start: block_group->discard_cursor,
520	end: btrfs_block_group_end(block_group),
521	minlen, maxlen, async: true);
522	discard_ctl->discard_bitmap_bytes += trimmed;
523	} else {
524	btrfs_trim_block_group_extents(block_group, trimmed: &trimmed,
525	start: block_group->discard_cursor,
526	end: btrfs_block_group_end(block_group),
527	minlen, async: true);
528	discard_ctl->discard_extent_bytes += trimmed;
529	}
530
531	/ Determine next steps for a block_group /
532	if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
533	if (discard_state == BTRFS_DISCARD_BITMAPS) {
534	btrfs_finish_discard_pass(discard_ctl, block_group);
535	} else {
536	block_group->discard_cursor = block_group->start;
537	spin_lock(lock: &discard_ctl->lock);
538	if (block_group->discard_state !=
539	BTRFS_DISCARD_RESET_CURSOR)
540	block_group->discard_state =
541	BTRFS_DISCARD_BITMAPS;
542	spin_unlock(lock: &discard_ctl->lock);
543	}
544	}
545
546	now = ktime_get_ns();
547	spin_lock(lock: &discard_ctl->lock);
548	discard_ctl->prev_discard = trimmed;
549	discard_ctl->prev_discard_time = now;
550	/*
551	* If the block group was removed from the discard list while it was
552	* running in this workfn, then we didn't deref it, since this function
553	* still owned that reference. But we set the discard_ctl->block_group
554	* back to NULL, so we can use that condition to know that now we need
555	* to deref the block_group.
556	*/
557	if (discard_ctl->block_group == NULL)
558	btrfs_put_block_group(cache: block_group);
559	discard_ctl->block_group = NULL;
560	__btrfs_discard_schedule_work(discard_ctl, now, override: false);
561	spin_unlock(lock: &discard_ctl->lock);
562	}
563
564	/*
565	* Recalculate the base delay.
566	*
567	* @discard_ctl: discard control
568	*
569	* Recalculate the base delay which is based off the total number of
570	* discardable_extents. Clamp this between the lower_limit (iops_limit or 1ms)
571	* and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
572	*/
573	void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
574	{
575	s32 discardable_extents;
576	s64 discardable_bytes;
577	u32 iops_limit;
578	unsigned long min_delay = BTRFS_DISCARD_MIN_DELAY_MSEC;
579	unsigned long delay;
580
581	discardable_extents = atomic_read(v: &discard_ctl->discardable_extents);
582	if (!discardable_extents)
583	return;
584
585	spin_lock(lock: &discard_ctl->lock);
586
587	/*
588	* The following is to fix a potential -1 discrepancy that we're not
589	* sure how to reproduce. But given that this is the only place that
590	* utilizes these numbers and this is only called by from
591	* btrfs_finish_extent_commit() which is synchronized, we can correct
592	* here.
593	*/
594	if (discardable_extents < `0`)
595	atomic_add(i: -discardable_extents,
596	v: &discard_ctl->discardable_extents);
597
598	discardable_bytes = atomic64_read(v: &discard_ctl->discardable_bytes);
599	if (discardable_bytes < `0`)
600	atomic64_add(i: -discardable_bytes,
601	v: &discard_ctl->discardable_bytes);
602
603	if (discardable_extents <= `0`) {
604	spin_unlock(lock: &discard_ctl->lock);
605	return;
606	}
607
608	iops_limit = READ_ONCE(discard_ctl->iops_limit);
609
610	if (iops_limit) {
611	delay = MSEC_PER_SEC / iops_limit;
612	} else {
613	/*
614	* Unset iops_limit means go as fast as possible, so allow a
615	* delay of 0.
616	*/
617	delay = `0`;
618	min_delay = `0`;
619	}
620
621	delay = clamp(delay, min_delay, BTRFS_DISCARD_MAX_DELAY_MSEC);
622	discard_ctl->delay_ms = delay;
623
624	spin_unlock(lock: &discard_ctl->lock);
625	}
626
627	/*
628	* Propagate discard counters.
629	*
630	* @block_group: block_group of interest
631	*
632	* Propagate deltas of counters up to the discard_ctl. It maintains a current
633	* counter and a previous counter passing the delta up to the global stat.
634	* Then the current counter value becomes the previous counter value.
635	*/
636	void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
637	{
638	struct btrfs_free_space_ctl *ctl;
639	struct btrfs_discard_ctl *discard_ctl;
640	s32 extents_delta;
641	s64 bytes_delta;
642
643	if (!block_group \|\|
644	!btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) \|\|
645	!btrfs_is_block_group_data_only(block_group))
646	return;
647
648	ctl = block_group->free_space_ctl;
649	discard_ctl = &block_group->fs_info->discard_ctl;
650
651	lockdep_assert_held(&ctl->tree_lock);
652	extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
653	ctl->discardable_extents[BTRFS_STAT_PREV];
654	if (extents_delta) {
655	atomic_add(i: extents_delta, v: &discard_ctl->discardable_extents);
656	ctl->discardable_extents[BTRFS_STAT_PREV] =
657	ctl->discardable_extents[BTRFS_STAT_CURR];
658	}
659
660	bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
661	ctl->discardable_bytes[BTRFS_STAT_PREV];
662	if (bytes_delta) {
663	atomic64_add(i: bytes_delta, v: &discard_ctl->discardable_bytes);
664	ctl->discardable_bytes[BTRFS_STAT_PREV] =
665	ctl->discardable_bytes[BTRFS_STAT_CURR];
666	}
667	}
668
669	/*
670	* Punt unused_bgs list to discard lists.
671	*
672	* @fs_info: fs_info of interest
673	*
674	* The unused_bgs list needs to be punted to the discard lists because the
675	* order of operations is changed. In the normal synchronous discard path, the
676	* block groups are trimmed via a single large trim in transaction commit. This
677	* is ultimately what we are trying to avoid with asynchronous discard. Thus,
678	* it must be done before going down the unused_bgs path.
679	*/
680	void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
681	{
682	struct btrfs_block_group block_group, next;
683
684	spin_lock(lock: &fs_info->unused_bgs_lock);
685	/ We enabled async discard, so punt all to the queue /
686	list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
687	bg_list) {
688	list_del_init(entry: &block_group->bg_list);
689	btrfs_discard_queue_work(discard_ctl: &fs_info->discard_ctl, block_group);
690	/*
691	* This put is for the get done by btrfs_mark_bg_unused.
692	* Queueing discard incremented it for discard's reference.
693	*/
694	btrfs_put_block_group(cache: block_group);
695	}
696	spin_unlock(lock: &fs_info->unused_bgs_lock);
697	}
698
699	/*
700	* Purge discard lists.
701	*
702	* @discard_ctl: discard control
703	*
704	* If we are disabling async discard, we may have intercepted block groups that
705	* are completely free and ready for the unused_bgs path. As discarding will
706	* now happen in transaction commit or not at all, we can safely mark the
707	* corresponding block groups as unused and they will be sent on their merry
708	* way to the unused_bgs list.
709	*/
710	static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
711	{
712	struct btrfs_block_group block_group, next;
713	int i;
714
715	spin_lock(lock: &discard_ctl->lock);
716	for (i = `0`; i < BTRFS_NR_DISCARD_LISTS; i++) {
717	list_for_each_entry_safe(block_group, next,
718	&discard_ctl->discard_list[i],
719	discard_list) {
720	list_del_init(entry: &block_group->discard_list);
721	spin_unlock(lock: &discard_ctl->lock);
722	if (block_group->used == `0`)
723	btrfs_mark_bg_unused(bg: block_group);
724	spin_lock(lock: &discard_ctl->lock);
725	btrfs_put_block_group(cache: block_group);
726	}
727	}
728	spin_unlock(lock: &discard_ctl->lock);
729	}
730
731	void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
732	{
733	if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
734	btrfs_discard_cleanup(fs_info);
735	return;
736	}
737
738	btrfs_discard_punt_unused_bgs_list(fs_info);
739
740	set_bit(nr: BTRFS_FS_DISCARD_RUNNING, addr: &fs_info->flags);
741	}
742
743	void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
744	{
745	clear_bit(nr: BTRFS_FS_DISCARD_RUNNING, addr: &fs_info->flags);
746	}
747
748	void btrfs_discard_init(struct btrfs_fs_info *fs_info)
749	{
750	struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
751	int i;
752
753	spin_lock_init(&discard_ctl->lock);
754	INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
755
756	for (i = `0`; i < BTRFS_NR_DISCARD_LISTS; i++)
757	INIT_LIST_HEAD(list: &discard_ctl->discard_list[i]);
758
759	discard_ctl->prev_discard = `0`;
760	discard_ctl->prev_discard_time = `0`;
761	atomic_set(v: &discard_ctl->discardable_extents, i: `0`);
762	atomic64_set(v: &discard_ctl->discardable_bytes, i: `0`);
763	discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
764	discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
765	discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
766	discard_ctl->kbps_limit = `0`;
767	discard_ctl->discard_extent_bytes = `0`;
768	discard_ctl->discard_bitmap_bytes = `0`;
769	atomic64_set(v: &discard_ctl->discard_bytes_saved, i: `0`);
770	}
771
772	void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
773	{
774	btrfs_discard_stop(fs_info);
775	cancel_delayed_work_sync(dwork: &fs_info->discard_ctl.work);
776	btrfs_discard_purge_list(discard_ctl: &fs_info->discard_ctl);
777	}
778

source code of linux/fs/btrfs/discard.c