journal.c source code [linux/fs/jbd2/journal.c]

1	// SPDX-License-Identifier: GPL-2.0+
2	/*
3	* linux/fs/jbd2/journal.c
4	*
5	* Written by Stephen C. Tweedie <sct@redhat.com>, 1998
6	*
7	* Copyright 1998 Red Hat corp --- All Rights Reserved
8	*
9	* Generic filesystem journal-writing code; part of the ext2fs
10	* journaling system.
11	*
12	* This file manages journals: areas of disk reserved for logging
13	* transactional updates. This includes the kernel journaling thread
14	* which is responsible for scheduling updates to the log.
15	*
16	* We do not actually manage the physical storage of the journal in this
17	* file: that is left to a per-journal policy function, which allows us
18	* to store the journal within a filesystem-specified area for ext2
19	* journaling (ext2 can use a reserved inode for storing the log).
20	*/
21
22	#include <linux/module.h>
23	#include <linux/time.h>
24	#include <linux/fs.h>
25	#include <linux/jbd2.h>
26	#include <linux/errno.h>
27	#include <linux/slab.h>
28	#include <linux/init.h>
29	#include <linux/mm.h>
30	#include <linux/freezer.h>
31	#include <linux/pagemap.h>
32	#include <linux/kthread.h>
33	#include <linux/poison.h>
34	#include <linux/proc_fs.h>
35	#include <linux/seq_file.h>
36	#include <linux/math64.h>
37	#include <linux/hash.h>
38	#include <linux/log2.h>
39	#include <linux/vmalloc.h>
40	#include <linux/backing-dev.h>
41	#include <linux/bitops.h>
42	#include <linux/ratelimit.h>
43	#include <linux/sched/mm.h>
44
45	#define CREATE_TRACE_POINTS
46	#include <trace/events/jbd2.h>
47
48	#include <linux/uaccess.h>
49	#include <asm/page.h>
50
51	#ifdef CONFIG_JBD2_DEBUG
52	static ushort jbd2_journal_enable_debug __read_mostly;
53
54	module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, `0644`);
55	MODULE_PARM_DESC(jbd2_debug, "Debugging level for jbd2");
56	#endif
57
58	EXPORT_SYMBOL(jbd2_journal_extend);
59	EXPORT_SYMBOL(jbd2_journal_stop);
60	EXPORT_SYMBOL(jbd2_journal_lock_updates);
61	EXPORT_SYMBOL(jbd2_journal_unlock_updates);
62	EXPORT_SYMBOL(jbd2_journal_get_write_access);
63	EXPORT_SYMBOL(jbd2_journal_get_create_access);
64	EXPORT_SYMBOL(jbd2_journal_get_undo_access);
65	EXPORT_SYMBOL(jbd2_journal_set_triggers);
66	EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
67	EXPORT_SYMBOL(jbd2_journal_forget);
68	EXPORT_SYMBOL(jbd2_journal_flush);
69	EXPORT_SYMBOL(jbd2_journal_revoke);
70
71	EXPORT_SYMBOL(jbd2_journal_init_dev);
72	EXPORT_SYMBOL(jbd2_journal_init_inode);
73	EXPORT_SYMBOL(jbd2_journal_check_used_features);
74	EXPORT_SYMBOL(jbd2_journal_check_available_features);
75	EXPORT_SYMBOL(jbd2_journal_set_features);
76	EXPORT_SYMBOL(jbd2_journal_load);
77	EXPORT_SYMBOL(jbd2_journal_destroy);
78	EXPORT_SYMBOL(jbd2_journal_abort);
79	EXPORT_SYMBOL(jbd2_journal_errno);
80	EXPORT_SYMBOL(jbd2_journal_ack_err);
81	EXPORT_SYMBOL(jbd2_journal_clear_err);
82	EXPORT_SYMBOL(jbd2_log_wait_commit);
83	EXPORT_SYMBOL(jbd2_journal_start_commit);
84	EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
85	EXPORT_SYMBOL(jbd2_journal_wipe);
86	EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
87	EXPORT_SYMBOL(jbd2_journal_invalidate_folio);
88	EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
89	EXPORT_SYMBOL(jbd2_journal_force_commit);
90	EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
91	EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
92	EXPORT_SYMBOL(jbd2_journal_finish_inode_data_buffers);
93	EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
94	EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
95	EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
96	EXPORT_SYMBOL(jbd2_inode_cache);
97
98	static int jbd2_journal_create_slab(size_t slab_size);
99
100	#ifdef CONFIG_JBD2_DEBUG
101	void __jbd2_debug(int level, const char file, const* char *func,
102	unsigned int line, const char *fmt, ...)
103	{
104	struct va_format vaf;
105	va_list args;
106
107	if (level > jbd2_journal_enable_debug)
108	return;
109	va_start(args, fmt);
110	vaf.fmt = fmt;
111	vaf.va = &args;
112	printk(KERN_DEBUG "%s: (%s, %u): %pV", file, func, line, &vaf);
113	va_end(args);
114	}
115	#endif
116
117	/ Checksumming functions /
118	static __be32 jbd2_superblock_csum(journal_t j, journal_superblock_t sb)
119	{
120	__u32 csum;
121	__be32 old_csum;
122
123	old_csum = sb->s_checksum;
124	sb->s_checksum = `0`;
125	csum = jbd2_chksum(journal: j, crc: ~`0`, address: (char )sb, length: sizeof*(journal_superblock_t));
126	sb->s_checksum = old_csum;
127
128	return cpu_to_be32(csum);
129	}
130
131	/*
132	* Helper function used to manage commit timeouts
133	*/
134
135	static void commit_timeout(struct timer_list *t)
136	{
137	journal_t *journal = from_timer(journal, t, j_commit_timer);
138
139	wake_up_process(tsk: journal->j_task);
140	}
141
142	/*
143	* kjournald2: The main thread function used to manage a logging device
144	* journal.
145	*
146	* This kernel thread is responsible for two things:
147	*
148	* 1) COMMIT: Every so often we need to commit the current state of the
149	* filesystem to disk. The journal thread is responsible for writing
150	* all of the metadata buffers to disk. If a fast commit is ongoing
151	* journal thread waits until it's done and then continues from
152	* there on.
153	*
154	* 2) CHECKPOINT: We cannot reuse a used section of the log file until all
155	* of the data in that part of the log has been rewritten elsewhere on
156	* the disk. Flushing these old buffers to reclaim space in the log is
157	* known as checkpointing, and this thread is responsible for that job.
158	*/
159
160	static int kjournald2(void *arg)
161	{
162	journal_t *journal = arg;
163	transaction_t *transaction;
164
165	/*
166	* Set up an interval timer which can be used to trigger a commit wakeup
167	* after the commit interval expires
168	*/
169	timer_setup(&journal->j_commit_timer, commit_timeout, `0`);
170
171	set_freezable();
172
173	/ Record that the journal thread is running /
174	journal->j_task = current;
175	wake_up(&journal->j_wait_done_commit);
176
177	/*
178	* Make sure that no allocations from this kernel thread will ever
179	* recurse to the fs layer because we are responsible for the
180	* transaction commit and any fs involvement might get stuck waiting for
181	* the trasn. commit.
182	*/
183	memalloc_nofs_save();
184
185	/*
186	* And now, wait forever for commit wakeup events.
187	*/
188	write_lock(&journal->j_state_lock);
189
190	loop:
191	if (journal->j_flags & JBD2_UNMOUNT)
192	goto end_loop;
193
194	jbd2_debug(`1`, "commit_sequence=%u, commit_request=%u\n",
195	journal->j_commit_sequence, journal->j_commit_request);
196
197	if (journal->j_commit_sequence != journal->j_commit_request) {
198	jbd2_debug(`1`, "OK, requests differ\n");
199	write_unlock(&journal->j_state_lock);
200	del_timer_sync(timer: &journal->j_commit_timer);
201	jbd2_journal_commit_transaction(journal);
202	write_lock(&journal->j_state_lock);
203	goto loop;
204	}
205
206	wake_up(&journal->j_wait_done_commit);
207	if (freezing(current)) {
208	/*
209	* The simpler the better. Flushing journal isn't a
210	* good idea, because that depends on threads that may
211	* be already stopped.
212	*/
213	jbd2_debug(`1`, "Now suspending kjournald2\n");
214	write_unlock(&journal->j_state_lock);
215	try_to_freeze();
216	write_lock(&journal->j_state_lock);
217	} else {
218	/*
219	* We assume on resume that commits are already there,
220	* so we don't sleep
221	*/
222	DEFINE_WAIT(wait);
223	int should_sleep = `1`;
224
225	prepare_to_wait(wq_head: &journal->j_wait_commit, wq_entry: &wait,
226	TASK_INTERRUPTIBLE);
227	if (journal->j_commit_sequence != journal->j_commit_request)
228	should_sleep = `0`;
229	transaction = journal->j_running_transaction;
230	if (transaction && time_after_eq(jiffies,
231	transaction->t_expires))
232	should_sleep = `0`;
233	if (journal->j_flags & JBD2_UNMOUNT)
234	should_sleep = `0`;
235	if (should_sleep) {
236	write_unlock(&journal->j_state_lock);
237	schedule();
238	write_lock(&journal->j_state_lock);
239	}
240	finish_wait(wq_head: &journal->j_wait_commit, wq_entry: &wait);
241	}
242
243	jbd2_debug(`1`, "kjournald2 wakes\n");
244
245	/*
246	* Were we woken up by a commit wakeup event?
247	*/
248	transaction = journal->j_running_transaction;
249	if (transaction && time_after_eq(jiffies, transaction->t_expires)) {
250	journal->j_commit_request = transaction->t_tid;
251	jbd2_debug(`1`, "woke because of timeout\n");
252	}
253	goto loop;
254
255	end_loop:
256	del_timer_sync(timer: &journal->j_commit_timer);
257	journal->j_task = NULL;
258	wake_up(&journal->j_wait_done_commit);
259	jbd2_debug(`1`, "Journal thread exiting.\n");
260	write_unlock(&journal->j_state_lock);
261	return `0`;
262	}
263
264	static int jbd2_journal_start_thread(journal_t *journal)
265	{
266	struct task_struct *t;
267
268	t = kthread_run(kjournald2, journal, "jbd2/%s",
269	journal->j_devname);
270	if (IS_ERR(ptr: t))
271	return PTR_ERR(ptr: t);
272
273	wait_event(journal->j_wait_done_commit, journal->j_task != NULL);
274	return `0`;
275	}
276
277	static void journal_kill_thread(journal_t *journal)
278	{
279	write_lock(&journal->j_state_lock);
280	journal->j_flags \|= JBD2_UNMOUNT;
281
282	while (journal->j_task) {
283	write_unlock(&journal->j_state_lock);
284	wake_up(&journal->j_wait_commit);
285	wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
286	write_lock(&journal->j_state_lock);
287	}
288	write_unlock(&journal->j_state_lock);
289	}
290
291	/*
292	* jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal.
293	*
294	* Writes a metadata buffer to a given disk block. The actual IO is not
295	* performed but a new buffer_head is constructed which labels the data
296	* to be written with the correct destination disk block.
297	*
298	* Any magic-number escaping which needs to be done will cause a
299	* copy-out here. If the buffer happens to start with the
300	* JBD2_MAGIC_NUMBER, then we can't write it to the log directly: the
301	* magic number is only written to the log for descripter blocks. In
302	* this case, we copy the data and replace the first word with 0, and we
303	* return a result code which indicates that this buffer needs to be
304	* marked as an escaped buffer in the corresponding log descriptor
305	* block. The missing word can then be restored when the block is read
306	* during recovery.
307	*
308	* If the source buffer has already been modified by a new transaction
309	* since we took the last commit snapshot, we use the frozen copy of
310	* that data for IO. If we end up using the existing buffer_head's data
311	* for the write, then we have to make sure nobody modifies it while the
312	* IO is in progress. do_get_write_access() handles this.
313	*
314	* The function returns a pointer to the buffer_head to be used for IO.
315	*
316	*
317	* Return value:
318	* <0: Error
319	* >=0: Finished OK
320	*
321	* On success:
322	* Bit 0 set == escape performed on the data
323	* Bit 1 set == buffer copy-out performed (kfree the data after IO)
324	*/
325
326	int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
327	struct journal_head *jh_in,
328	struct buffer_head **bh_out,
329	sector_t blocknr)
330	{
331	int need_copy_out = `0`;
332	int done_copy_out = `0`;
333	int do_escape = `0`;
334	char *mapped_data;
335	struct buffer_head *new_bh;
336	struct folio *new_folio;
337	unsigned int new_offset;
338	struct buffer_head *bh_in = jh2bh(jh: jh_in);
339	journal_t *journal = transaction->t_journal;
340
341	/*
342	* The buffer really shouldn't be locked: only the current committing
343	* transaction is allowed to write it, so nobody else is allowed
344	* to do any IO.
345	*
346	* akpm: except if we're journalling data, and write() output is
347	* also part of a shared mapping, and another thread has
348	* decided to launch a writepage() against this buffer.
349	*/
350	J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
351
352	new_bh = alloc_buffer_head(GFP_NOFS\|__GFP_NOFAIL);
353
354	/ keep subsequent assertions sane /
355	atomic_set(v: &new_bh->b_count, i: `1`);
356
357	spin_lock(lock: &jh_in->b_state_lock);
358	repeat:
359	/*
360	* If a new transaction has already done a buffer copy-out, then
361	* we use that version of the data for the commit.
362	*/
363	if (jh_in->b_frozen_data) {
364	done_copy_out = `1`;
365	new_folio = virt_to_folio(x: jh_in->b_frozen_data);
366	new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
367	} else {
368	new_folio = jh2bh(jh: jh_in)->b_folio;
369	new_offset = offset_in_folio(new_folio, jh2bh(jh_in)->b_data);
370	}
371
372	mapped_data = kmap_local_folio(folio: new_folio, offset: new_offset);
373	/*
374	* Fire data frozen trigger if data already wasn't frozen. Do this
375	* before checking for escaping, as the trigger may modify the magic
376	* offset. If a copy-out happens afterwards, it will have the correct
377	* data in the buffer.
378	*/
379	if (!done_copy_out)
380	jbd2_buffer_frozen_trigger(jh: jh_in, mapped_data,
381	triggers: jh_in->b_triggers);
382
383	/*
384	* Check for escaping
385	*/
386	if (((__be32 )mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER)) {
387	need_copy_out = `1`;
388	do_escape = `1`;
389	}
390	kunmap_local(mapped_data);
391
392	/*
393	* Do we need to do a data copy?
394	*/
395	if (need_copy_out && !done_copy_out) {
396	char *tmp;
397
398	spin_unlock(lock: &jh_in->b_state_lock);
399	tmp = jbd2_alloc(size: bh_in->b_size, GFP_NOFS);
400	if (!tmp) {
401	brelse(bh: new_bh);
402	return -ENOMEM;
403	}
404	spin_lock(lock: &jh_in->b_state_lock);
405	if (jh_in->b_frozen_data) {
406	jbd2_free(ptr: tmp, size: bh_in->b_size);
407	goto repeat;
408	}
409
410	jh_in->b_frozen_data = tmp;
411	memcpy_from_folio(to: tmp, folio: new_folio, offset: new_offset, len: bh_in->b_size);
412
413	new_folio = virt_to_folio(x: tmp);
414	new_offset = offset_in_folio(new_folio, tmp);
415	done_copy_out = `1`;
416
417	/*
418	* This isn't strictly necessary, as we're using frozen
419	* data for the escaping, but it keeps consistency with
420	* b_frozen_data usage.
421	*/
422	jh_in->b_frozen_triggers = jh_in->b_triggers;
423	}
424
425	/*
426	* Did we need to do an escaping? Now we've done all the
427	* copying, we can finally do so.
428	*/
429	if (do_escape) {
430	mapped_data = kmap_local_folio(folio: new_folio, offset: new_offset);
431	((unsigned* int *)mapped_data) = `0`;
432	kunmap_local(mapped_data);
433	}
434
435	folio_set_bh(bh: new_bh, folio: new_folio, offset: new_offset);
436	new_bh->b_size = bh_in->b_size;
437	new_bh->b_bdev = journal->j_dev;
438	new_bh->b_blocknr = blocknr;
439	new_bh->b_private = bh_in;
440	set_buffer_mapped(new_bh);
441	set_buffer_dirty(new_bh);
442
443	*bh_out = new_bh;
444
445	/*
446	* The to-be-written buffer needs to get moved to the io queue,
447	* and the original buffer whose contents we are shadowing or
448	* copying is moved to the transaction's shadow queue.
449	*/
450	JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
451	spin_lock(lock: &journal->j_list_lock);
452	__jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
453	spin_unlock(lock: &journal->j_list_lock);
454	set_buffer_shadow(bh_in);
455	spin_unlock(lock: &jh_in->b_state_lock);
456
457	return do_escape \| (done_copy_out << `1`);
458	}
459
460	/*
461	* Allocation code for the journal file. Manage the space left in the
462	* journal, so that we can begin checkpointing when appropriate.
463	*/
464
465	/*
466	* Called with j_state_lock locked for writing.
467	* Returns true if a transaction commit was started.
468	*/
469	static int __jbd2_log_start_commit(journal_t *journal, tid_t target)
470	{
471	/ Return if the txn has already requested to be committed /
472	if (journal->j_commit_request == target)
473	return `0`;
474
475	/*
476	* The only transaction we can possibly wait upon is the
477	* currently running transaction (if it exists). Otherwise,
478	* the target tid must be an old one.
479	*/
480	if (journal->j_running_transaction &&
481	journal->j_running_transaction->t_tid == target) {
482	/*
483	* We want a new commit: OK, mark the request and wakeup the
484	* commit thread. We do _not_ do the commit ourselves.
485	*/
486
487	journal->j_commit_request = target;
488	jbd2_debug(`1`, "JBD2: requesting commit %u/%u\n",
489	journal->j_commit_request,
490	journal->j_commit_sequence);
491	journal->j_running_transaction->t_requested = jiffies;
492	wake_up(&journal->j_wait_commit);
493	return `1`;
494	} else if (!tid_geq(x: journal->j_commit_request, y: target))
495	/ This should never happen, but if it does, preserve*
496	the evidence before kjournald goes into a loop and
497	increments j_commit_sequence beyond all recognition. /*
498	WARN_ONCE(`1`, "JBD2: bad log_start_commit: %u %u %u %u\n",
499	journal->j_commit_request,
500	journal->j_commit_sequence,
501	target, journal->j_running_transaction ?
502	journal->j_running_transaction->t_tid : `0`);
503	return `0`;
504	}
505
506	int jbd2_log_start_commit(journal_t *journal, tid_t tid)
507	{
508	int ret;
509
510	write_lock(&journal->j_state_lock);
511	ret = __jbd2_log_start_commit(journal, target: tid);
512	write_unlock(&journal->j_state_lock);
513	return ret;
514	}
515
516	/*
517	* Force and wait any uncommitted transactions. We can only force the running
518	* transaction if we don't have an active handle, otherwise, we will deadlock.
519	* Returns: <0 in case of error,
520	* 0 if nothing to commit,
521	* 1 if transaction was successfully committed.
522	*/
523	static int __jbd2_journal_force_commit(journal_t *journal)
524	{
525	transaction_t *transaction = NULL;
526	tid_t tid;
527	int need_to_start = `0`, ret = `0`;
528
529	read_lock(&journal->j_state_lock);
530	if (journal->j_running_transaction && !current->journal_info) {
531	transaction = journal->j_running_transaction;
532	if (!tid_geq(x: journal->j_commit_request, y: transaction->t_tid))
533	need_to_start = `1`;
534	} else if (journal->j_committing_transaction)
535	transaction = journal->j_committing_transaction;
536
537	if (!transaction) {
538	/ Nothing to commit /
539	read_unlock(&journal->j_state_lock);
540	return `0`;
541	}
542	tid = transaction->t_tid;
543	read_unlock(&journal->j_state_lock);
544	if (need_to_start)
545	jbd2_log_start_commit(journal, tid);
546	ret = jbd2_log_wait_commit(journal, tid);
547	if (!ret)
548	ret = `1`;
549
550	return ret;
551	}
552
553	/**
554	* jbd2_journal_force_commit_nested - Force and wait upon a commit if the
555	* calling process is not within transaction.
556	*
557	* @journal: journal to force
558	* Returns true if progress was made.
559	*
560	* This is used for forcing out undo-protected data which contains
561	* bitmaps, when the fs is running out of space.
562	*/
563	int jbd2_journal_force_commit_nested(journal_t *journal)
564	{
565	int ret;
566
567	ret = __jbd2_journal_force_commit(journal);
568	return ret > `0`;
569	}
570
571	/**
572	* jbd2_journal_force_commit() - force any uncommitted transactions
573	* @journal: journal to force
574	*
575	* Caller want unconditional commit. We can only force the running transaction
576	* if we don't have an active handle, otherwise, we will deadlock.
577	*/
578	int jbd2_journal_force_commit(journal_t *journal)
579	{
580	int ret;
581
582	J_ASSERT(!current->journal_info);
583	ret = __jbd2_journal_force_commit(journal);
584	if (ret > `0`)
585	ret = `0`;
586	return ret;
587	}
588
589	/*
590	* Start a commit of the current running transaction (if any). Returns true
591	* if a transaction is going to be committed (or is currently already
592	* committing), and fills its tid in at *ptid
593	*/
594	int jbd2_journal_start_commit(journal_t journal, tid_t ptid)
595	{
596	int ret = `0`;
597
598	write_lock(&journal->j_state_lock);
599	if (journal->j_running_transaction) {
600	tid_t tid = journal->j_running_transaction->t_tid;
601
602	__jbd2_log_start_commit(journal, target: tid);
603	/ There's a running transaction and we've just made sure*
604	* it's commit has been scheduled. */
605	if (ptid)
606	*ptid = tid;
607	ret = `1`;
608	} else if (journal->j_committing_transaction) {
609	/*
610	* If commit has been started, then we have to wait for
611	* completion of that transaction.
612	*/
613	if (ptid)
614	*ptid = journal->j_committing_transaction->t_tid;
615	ret = `1`;
616	}
617	write_unlock(&journal->j_state_lock);
618	return ret;
619	}
620
621	/*
622	* Return 1 if a given transaction has not yet sent barrier request
623	* connected with a transaction commit. If 0 is returned, transaction
624	* may or may not have sent the barrier. Used to avoid sending barrier
625	* twice in common cases.
626	*/
627	int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
628	{
629	int ret = `0`;
630	transaction_t *commit_trans;
631
632	if (!(journal->j_flags & JBD2_BARRIER))
633	return `0`;
634	read_lock(&journal->j_state_lock);
635	/ Transaction already committed? /
636	if (tid_geq(x: journal->j_commit_sequence, y: tid))
637	goto out;
638	commit_trans = journal->j_committing_transaction;
639	if (!commit_trans \|\| commit_trans->t_tid != tid) {
640	ret = `1`;
641	goto out;
642	}
643	/*
644	* Transaction is being committed and we already proceeded to
645	* submitting a flush to fs partition?
646	*/
647	if (journal->j_fs_dev != journal->j_dev) {
648	if (!commit_trans->t_need_data_flush \|\|
649	commit_trans->t_state >= T_COMMIT_DFLUSH)
650	goto out;
651	} else {
652	if (commit_trans->t_state >= T_COMMIT_JFLUSH)
653	goto out;
654	}
655	ret = `1`;
656	out:
657	read_unlock(&journal->j_state_lock);
658	return ret;
659	}
660	EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier);
661
662	/*
663	* Wait for a specified commit to complete.
664	* The caller may not hold the journal lock.
665	*/
666	int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
667	{
668	int err = `0`;
669
670	read_lock(&journal->j_state_lock);
671	#ifdef CONFIG_PROVE_LOCKING
672	/*
673	* Some callers make sure transaction is already committing and in that
674	* case we cannot block on open handles anymore. So don't warn in that
675	* case.
676	*/
677	if (tid_gt(x: tid, y: journal->j_commit_sequence) &&
678	(!journal->j_committing_transaction \|\|
679	journal->j_committing_transaction->t_tid != tid)) {
680	read_unlock(&journal->j_state_lock);
681	jbd2_might_wait_for_commit(journal);
682	read_lock(&journal->j_state_lock);
683	}
684	#endif
685	#ifdef CONFIG_JBD2_DEBUG
686	if (!tid_geq(x: journal->j_commit_request, y: tid)) {
687	printk(KERN_ERR
688	"%s: error: j_commit_request=%u, tid=%u\n",
689	__func__, journal->j_commit_request, tid);
690	}
691	#endif
692	while (tid_gt(x: tid, y: journal->j_commit_sequence)) {
693	jbd2_debug(`1`, "JBD2: want %u, j_commit_sequence=%u\n",
694	tid, journal->j_commit_sequence);
695	read_unlock(&journal->j_state_lock);
696	wake_up(&journal->j_wait_commit);
697	wait_event(journal->j_wait_done_commit,
698	!tid_gt(tid, journal->j_commit_sequence));
699	read_lock(&journal->j_state_lock);
700	}
701	read_unlock(&journal->j_state_lock);
702
703	if (unlikely(is_journal_aborted(journal)))
704	err = -EIO;
705	return err;
706	}
707
708	/*
709	* Start a fast commit. If there's an ongoing fast or full commit wait for
710	* it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY
711	* if a fast commit is not needed, either because there's an already a commit
712	* going on or this tid has already been committed. Returns -EINVAL if no jbd2
713	* commit has yet been performed.
714	*/
715	int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
716	{
717	if (unlikely(is_journal_aborted(journal)))
718	return -EIO;
719	/*
720	* Fast commits only allowed if at least one full commit has
721	* been processed.
722	*/
723	if (!journal->j_stats.ts_tid)
724	return -EINVAL;
725
726	write_lock(&journal->j_state_lock);
727	if (tid <= journal->j_commit_sequence) {
728	write_unlock(&journal->j_state_lock);
729	return -EALREADY;
730	}
731
732	if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING \|\|
733	(journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
734	DEFINE_WAIT(wait);
735
736	prepare_to_wait(wq_head: &journal->j_fc_wait, wq_entry: &wait,
737	TASK_UNINTERRUPTIBLE);
738	write_unlock(&journal->j_state_lock);
739	schedule();
740	finish_wait(wq_head: &journal->j_fc_wait, wq_entry: &wait);
741	return -EALREADY;
742	}
743	journal->j_flags \|= JBD2_FAST_COMMIT_ONGOING;
744	write_unlock(&journal->j_state_lock);
745	jbd2_journal_lock_updates(journal);
746
747	return `0`;
748	}
749	EXPORT_SYMBOL(jbd2_fc_begin_commit);
750
751	/*
752	* Stop a fast commit. If fallback is set, this function starts commit of
753	* TID tid before any other fast commit can start.
754	*/
755	static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
756	{
757	jbd2_journal_unlock_updates(journal);
758	if (journal->j_fc_cleanup_callback)
759	journal->j_fc_cleanup_callback(journal, `0`, tid);
760	write_lock(&journal->j_state_lock);
761	journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
762	if (fallback)
763	journal->j_flags \|= JBD2_FULL_COMMIT_ONGOING;
764	write_unlock(&journal->j_state_lock);
765	wake_up(&journal->j_fc_wait);
766	if (fallback)
767	return jbd2_complete_transaction(journal, tid);
768	return `0`;
769	}
770
771	int jbd2_fc_end_commit(journal_t *journal)
772	{
773	return __jbd2_fc_end_commit(journal, tid: `0`, fallback: false);
774	}
775	EXPORT_SYMBOL(jbd2_fc_end_commit);
776
777	int jbd2_fc_end_commit_fallback(journal_t *journal)
778	{
779	tid_t tid;
780
781	read_lock(&journal->j_state_lock);
782	tid = journal->j_running_transaction ?
783	journal->j_running_transaction->t_tid : `0`;
784	read_unlock(&journal->j_state_lock);
785	return __jbd2_fc_end_commit(journal, tid, fallback: true);
786	}
787	EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
788
789	/ Return 1 when transaction with given tid has already committed. /
790	int jbd2_transaction_committed(journal_t *journal, tid_t tid)
791	{
792	int ret = `1`;
793
794	read_lock(&journal->j_state_lock);
795	if (journal->j_running_transaction &&
796	journal->j_running_transaction->t_tid == tid)
797	ret = `0`;
798	if (journal->j_committing_transaction &&
799	journal->j_committing_transaction->t_tid == tid)
800	ret = `0`;
801	read_unlock(&journal->j_state_lock);
802	return ret;
803	}
804	EXPORT_SYMBOL(jbd2_transaction_committed);
805
806	/*
807	* When this function returns the transaction corresponding to tid
808	* will be completed. If the transaction has currently running, start
809	* committing that transaction before waiting for it to complete. If
810	* the transaction id is stale, it is by definition already completed,
811	* so just return SUCCESS.
812	*/
813	int jbd2_complete_transaction(journal_t *journal, tid_t tid)
814	{
815	int need_to_wait = `1`;
816
817	read_lock(&journal->j_state_lock);
818	if (journal->j_running_transaction &&
819	journal->j_running_transaction->t_tid == tid) {
820	if (journal->j_commit_request != tid) {
821	/ transaction not yet started, so request it /
822	read_unlock(&journal->j_state_lock);
823	jbd2_log_start_commit(journal, tid);
824	goto wait_commit;
825	}
826	} else if (!(journal->j_committing_transaction &&
827	journal->j_committing_transaction->t_tid == tid))
828	need_to_wait = `0`;
829	read_unlock(&journal->j_state_lock);
830	if (!need_to_wait)
831	return `0`;
832	wait_commit:
833	return jbd2_log_wait_commit(journal, tid);
834	}
835	EXPORT_SYMBOL(jbd2_complete_transaction);
836
837	/*
838	* Log buffer allocation routines:
839	*/
840
841	int jbd2_journal_next_log_block(journal_t journal, unsigned* long long *retp)
842	{
843	unsigned long blocknr;
844
845	write_lock(&journal->j_state_lock);
846	J_ASSERT(journal->j_free > `1`);
847
848	blocknr = journal->j_head;
849	journal->j_head++;
850	journal->j_free--;
851	if (journal->j_head == journal->j_last)
852	journal->j_head = journal->j_first;
853	write_unlock(&journal->j_state_lock);
854	return jbd2_journal_bmap(journal, blocknr, retp);
855	}
856
857	/ Map one fast commit buffer for use by the file system /
858	int jbd2_fc_get_buf(journal_t journal, struct* buffer_head **bh_out)
859	{
860	unsigned long long pblock;
861	unsigned long blocknr;
862	int ret = `0`;
863	struct buffer_head *bh;
864	int fc_off;
865
866	*bh_out = NULL;
867
868	if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
869	fc_off = journal->j_fc_off;
870	blocknr = journal->j_fc_first + fc_off;
871	journal->j_fc_off++;
872	} else {
873	ret = -EINVAL;
874	}
875
876	if (ret)
877	return ret;
878
879	ret = jbd2_journal_bmap(journal, blocknr, &pblock);
880	if (ret)
881	return ret;
882
883	bh = __getblk(bdev: journal->j_dev, block: pblock, size: journal->j_blocksize);
884	if (!bh)
885	return -ENOMEM;
886
887
888	journal->j_fc_wbuf[fc_off] = bh;
889
890	*bh_out = bh;
891
892	return `0`;
893	}
894	EXPORT_SYMBOL(jbd2_fc_get_buf);
895
896	/*
897	* Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
898	* for completion.
899	*/
900	int jbd2_fc_wait_bufs(journal_t journal, int* num_blks)
901	{
902	struct buffer_head *bh;
903	int i, j_fc_off;
904
905	j_fc_off = journal->j_fc_off;
906
907	/*
908	* Wait in reverse order to minimize chances of us being woken up before
909	* all IOs have completed
910	*/
911	for (i = j_fc_off - `1`; i >= j_fc_off - num_blks; i--) {
912	bh = journal->j_fc_wbuf[i];
913	wait_on_buffer(bh);
914	/*
915	* Update j_fc_off so jbd2_fc_release_bufs can release remain
916	* buffer head.
917	*/
918	if (unlikely(!buffer_uptodate(bh))) {
919	journal->j_fc_off = i + `1`;
920	return -EIO;
921	}
922	put_bh(bh);
923	journal->j_fc_wbuf[i] = NULL;
924	}
925
926	return `0`;
927	}
928	EXPORT_SYMBOL(jbd2_fc_wait_bufs);
929
930	int jbd2_fc_release_bufs(journal_t *journal)
931	{
932	struct buffer_head *bh;
933	int i, j_fc_off;
934
935	j_fc_off = journal->j_fc_off;
936
937	for (i = j_fc_off - `1`; i >= `0`; i--) {
938	bh = journal->j_fc_wbuf[i];
939	if (!bh)
940	break;
941	put_bh(bh);
942	journal->j_fc_wbuf[i] = NULL;
943	}
944
945	return `0`;
946	}
947	EXPORT_SYMBOL(jbd2_fc_release_bufs);
948
949	/*
950	* Conversion of logical to physical block numbers for the journal
951	*
952	* On external journals the journal blocks are identity-mapped, so
953	* this is a no-op. If needed, we can use j_blk_offset - everything is
954	* ready.
955	*/
956	int jbd2_journal_bmap(journal_t journal, unsigned* long blocknr,
957	unsigned long long *retp)
958	{
959	int err = `0`;
960	unsigned long long ret;
961	sector_t block = blocknr;
962
963	if (journal->j_bmap) {
964	err = journal->j_bmap(journal, &block);
965	if (err == `0`)
966	*retp = block;
967	} else if (journal->j_inode) {
968	ret = bmap(inode: journal->j_inode, block: &block);
969
970	if (ret \|\| !block) {
971	printk(KERN_ALERT "%s: journal block not found "
972	"at offset %lu on %s\n",
973	__func__, blocknr, journal->j_devname);
974	err = -EIO;
975	jbd2_journal_abort(journal, err);
976	} else {
977	*retp = block;
978	}
979
980	} else {
981	retp = blocknr; /* +journal->j_blk_offset /
982	}
983	return err;
984	}
985
986	/*
987	* We play buffer_head aliasing tricks to write data/metadata blocks to
988	* the journal without copying their contents, but for journal
989	* descriptor blocks we do need to generate bona fide buffers.
990	*
991	* After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying
992	* the buffer's contents they really should run flush_dcache_page(bh->b_page).
993	* But we don't bother doing that, so there will be coherency problems with
994	* mmaps of blockdevs which hold live JBD-controlled filesystems.
995	*/
996	struct buffer_head *
997	jbd2_journal_get_descriptor_buffer(transaction_t transaction, int* type)
998	{
999	journal_t *journal = transaction->t_journal;
1000	struct buffer_head *bh;
1001	unsigned long long blocknr;
1002	journal_header_t *header;
1003	int err;
1004
1005	err = jbd2_journal_next_log_block(journal, retp: &blocknr);
1006
1007	if (err)
1008	return NULL;
1009
1010	bh = __getblk(bdev: journal->j_dev, block: blocknr, size: journal->j_blocksize);
1011	if (!bh)
1012	return NULL;
1013	atomic_dec(v: &transaction->t_outstanding_credits);
1014	lock_buffer(bh);
1015	memset(bh->b_data, `0`, journal->j_blocksize);
1016	header = (journal_header_t *)bh->b_data;
1017	header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
1018	header->h_blocktype = cpu_to_be32(type);
1019	header->h_sequence = cpu_to_be32(transaction->t_tid);
1020	set_buffer_uptodate(bh);
1021	unlock_buffer(bh);
1022	BUFFER_TRACE(bh, "return this buffer");
1023	return bh;
1024	}
1025
1026	void jbd2_descriptor_block_csum_set(journal_t j, struct* buffer_head *bh)
1027	{
1028	struct jbd2_journal_block_tail *tail;
1029	__u32 csum;
1030
1031	if (!jbd2_journal_has_csum_v2or3(journal: j))
1032	return;
1033
1034	tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
1035	sizeof(struct jbd2_journal_block_tail));
1036	tail->t_checksum = `0`;
1037	csum = jbd2_chksum(journal: j, crc: j->j_csum_seed, address: bh->b_data, length: j->j_blocksize);
1038	tail->t_checksum = cpu_to_be32(csum);
1039	}
1040
1041	/*
1042	* Return tid of the oldest transaction in the journal and block in the journal
1043	* where the transaction starts.
1044	*
1045	* If the journal is now empty, return which will be the next transaction ID
1046	* we will write and where will that transaction start.
1047	*
1048	* The return value is 0 if journal tail cannot be pushed any further, 1 if
1049	* it can.
1050	*/
1051	int jbd2_journal_get_log_tail(journal_t journal, tid_t tid,
1052	unsigned long *block)
1053	{
1054	transaction_t *transaction;
1055	int ret;
1056
1057	read_lock(&journal->j_state_lock);
1058	spin_lock(lock: &journal->j_list_lock);
1059	transaction = journal->j_checkpoint_transactions;
1060	if (transaction) {
1061	*tid = transaction->t_tid;
1062	*block = transaction->t_log_start;
1063	} else if ((transaction = journal->j_committing_transaction) != NULL) {
1064	*tid = transaction->t_tid;
1065	*block = transaction->t_log_start;
1066	} else if ((transaction = journal->j_running_transaction) != NULL) {
1067	*tid = transaction->t_tid;
1068	*block = journal->j_head;
1069	} else {
1070	*tid = journal->j_transaction_sequence;
1071	*block = journal->j_head;
1072	}
1073	ret = tid_gt(x: *tid, y: journal->j_tail_sequence);
1074	spin_unlock(lock: &journal->j_list_lock);
1075	read_unlock(&journal->j_state_lock);
1076
1077	return ret;
1078	}
1079
1080	/*
1081	* Update information in journal structure and in on disk journal superblock
1082	* about log tail. This function does not check whether information passed in
1083	* really pushes log tail further. It's responsibility of the caller to make
1084	* sure provided log tail information is valid (e.g. by holding
1085	* j_checkpoint_mutex all the time between computing log tail and calling this
1086	* function as is the case with jbd2_cleanup_journal_tail()).
1087	*
1088	* Requires j_checkpoint_mutex
1089	*/
1090	int __jbd2_update_log_tail(journal_t journal, tid_t tid, unsigned* long block)
1091	{
1092	unsigned long freed;
1093	int ret;
1094
1095	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1096
1097	/*
1098	* We cannot afford for write to remain in drive's caches since as
1099	* soon as we update j_tail, next transaction can start reusing journal
1100	* space and if we lose sb update during power failure we'd replay
1101	* old transaction with possibly newly overwritten data.
1102	*/
1103	ret = jbd2_journal_update_sb_log_tail(journal, tid, block,
1104	REQ_SYNC \| REQ_FUA);
1105	if (ret)
1106	goto out;
1107
1108	write_lock(&journal->j_state_lock);
1109	freed = block - journal->j_tail;
1110	if (block < journal->j_tail)
1111	freed += journal->j_last - journal->j_first;
1112
1113	trace_jbd2_update_log_tail(journal, first_tid: tid, block_nr: block, freed);
1114	jbd2_debug(`1`,
1115	"Cleaning journal tail from %u to %u (offset %lu), "
1116	"freeing %lu\n",
1117	journal->j_tail_sequence, tid, block, freed);
1118
1119	journal->j_free += freed;
1120	journal->j_tail_sequence = tid;
1121	journal->j_tail = block;
1122	write_unlock(&journal->j_state_lock);
1123
1124	out:
1125	return ret;
1126	}
1127
1128	/*
1129	* This is a variation of __jbd2_update_log_tail which checks for validity of
1130	* provided log tail and locks j_checkpoint_mutex. So it is safe against races
1131	* with other threads updating log tail.
1132	*/
1133	void jbd2_update_log_tail(journal_t journal, tid_t tid, unsigned* long block)
1134	{
1135	mutex_lock_io(&journal->j_checkpoint_mutex);
1136	if (tid_gt(x: tid, y: journal->j_tail_sequence))
1137	__jbd2_update_log_tail(journal, tid, block);
1138	mutex_unlock(lock: &journal->j_checkpoint_mutex);
1139	}
1140
1141	struct jbd2_stats_proc_session {
1142	journal_t *journal;
1143	struct transaction_stats_s *stats;
1144	int start;
1145	int max;
1146	};
1147
1148	static void jbd2_seq_info_start(struct* seq_file seq, loff_t pos)
1149	{
1150	return *pos ? NULL : SEQ_START_TOKEN;
1151	}
1152
1153	static void jbd2_seq_info_next(struct* seq_file seq, void* v, loff_t pos)
1154	{
1155	(*pos)++;
1156	return NULL;
1157	}
1158
1159	static int jbd2_seq_info_show(struct seq_file seq, void* *v)
1160	{
1161	struct jbd2_stats_proc_session *s = seq->private;
1162
1163	if (v != SEQ_START_TOKEN)
1164	return `0`;
1165	seq_printf(m: seq, fmt: "%lu transactions (%lu requested), "
1166	"each up to %u blocks\n",
1167	s->stats->ts_tid, s->stats->ts_requested,
1168	s->journal->j_max_transaction_buffers);
1169	if (s->stats->ts_tid == `0`)
1170	return `0`;
1171	seq_printf(m: seq, fmt: "average: \n %ums waiting for transaction\n",
1172	jiffies_to_msecs(j: s->stats->run.rs_wait / s->stats->ts_tid));
1173	seq_printf(m: seq, fmt: " %ums request delay\n",
1174	(s->stats->ts_requested == `0`) ? `0` :
1175	jiffies_to_msecs(j: s->stats->run.rs_request_delay /
1176	s->stats->ts_requested));
1177	seq_printf(m: seq, fmt: " %ums running transaction\n",
1178	jiffies_to_msecs(j: s->stats->run.rs_running / s->stats->ts_tid));
1179	seq_printf(m: seq, fmt: " %ums transaction was being locked\n",
1180	jiffies_to_msecs(j: s->stats->run.rs_locked / s->stats->ts_tid));
1181	seq_printf(m: seq, fmt: " %ums flushing data (in ordered mode)\n",
1182	jiffies_to_msecs(j: s->stats->run.rs_flushing / s->stats->ts_tid));
1183	seq_printf(m: seq, fmt: " %ums logging transaction\n",
1184	jiffies_to_msecs(j: s->stats->run.rs_logging / s->stats->ts_tid));
1185	seq_printf(m: seq, fmt: " %lluus average transaction commit time\n",
1186	div_u64(dividend: s->journal->j_average_commit_time, divisor: `1000`));
1187	seq_printf(m: seq, fmt: " %lu handles per transaction\n",
1188	s->stats->run.rs_handle_count / s->stats->ts_tid);
1189	seq_printf(m: seq, fmt: " %lu blocks per transaction\n",
1190	s->stats->run.rs_blocks / s->stats->ts_tid);
1191	seq_printf(m: seq, fmt: " %lu logged blocks per transaction\n",
1192	s->stats->run.rs_blocks_logged / s->stats->ts_tid);
1193	return `0`;
1194	}
1195
1196	static void jbd2_seq_info_stop(struct seq_file seq, void* *v)
1197	{
1198	}
1199
1200	static const struct seq_operations jbd2_seq_info_ops = {
1201	.start = jbd2_seq_info_start,
1202	.next = jbd2_seq_info_next,
1203	.stop = jbd2_seq_info_stop,
1204	.show = jbd2_seq_info_show,
1205	};
1206
1207	static int jbd2_seq_info_open(struct inode inode, struct* file *file)
1208	{
1209	journal_t *journal = pde_data(inode);
1210	struct jbd2_stats_proc_session *s;
1211	int rc, size;
1212
1213	s = kmalloc(size: sizeof(*s), GFP_KERNEL);
1214	if (s == NULL)
1215	return -ENOMEM;
1216	size = sizeof(struct transaction_stats_s);
1217	s->stats = kmalloc(size, GFP_KERNEL);
1218	if (s->stats == NULL) {
1219	kfree(objp: s);
1220	return -ENOMEM;
1221	}
1222	spin_lock(lock: &journal->j_history_lock);
1223	memcpy(s->stats, &journal->j_stats, size);
1224	s->journal = journal;
1225	spin_unlock(lock: &journal->j_history_lock);
1226
1227	rc = seq_open(file, &jbd2_seq_info_ops);
1228	if (rc == `0`) {
1229	struct seq_file *m = file->private_data;
1230	m->private = s;
1231	} else {
1232	kfree(objp: s->stats);
1233	kfree(objp: s);
1234	}
1235	return rc;
1236
1237	}
1238
1239	static int jbd2_seq_info_release(struct inode inode, struct* file *file)
1240	{
1241	struct seq_file *seq = file->private_data;
1242	struct jbd2_stats_proc_session *s = seq->private;
1243	kfree(objp: s->stats);
1244	kfree(objp: s);
1245	return seq_release(inode, file);
1246	}
1247
1248	static const struct proc_ops jbd2_info_proc_ops = {
1249	.proc_open = jbd2_seq_info_open,
1250	.proc_read = seq_read,
1251	.proc_lseek = seq_lseek,
1252	.proc_release = jbd2_seq_info_release,
1253	};
1254
1255	static struct proc_dir_entry *proc_jbd2_stats;
1256
1257	static void jbd2_stats_proc_init(journal_t *journal)
1258	{
1259	journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats);
1260	if (journal->j_proc_entry) {
1261	proc_create_data("info", S_IRUGO, journal->j_proc_entry,
1262	&jbd2_info_proc_ops, journal);
1263	}
1264	}
1265
1266	static void jbd2_stats_proc_exit(journal_t *journal)
1267	{
1268	remove_proc_entry("info", journal->j_proc_entry);
1269	remove_proc_entry(journal->j_devname, proc_jbd2_stats);
1270	}
1271
1272	/ Minimum size of descriptor tag /
1273	static int jbd2_min_tag_size(void)
1274	{
1275	/*
1276	* Tag with 32-bit block numbers does not use last four bytes of the
1277	* structure
1278	*/
1279	return sizeof(journal_block_tag_t) - `4`;
1280	}
1281
1282	/**
1283	* jbd2_journal_shrink_scan()
1284	* @shrink: shrinker to work on
1285	* @sc: reclaim request to process
1286	*
1287	* Scan the checkpointed buffer on the checkpoint list and release the
1288	* journal_head.
1289	*/
1290	static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink,
1291	struct shrink_control *sc)
1292	{
1293	journal_t *journal = shrink->private_data;
1294	unsigned long nr_to_scan = sc->nr_to_scan;
1295	unsigned long nr_shrunk;
1296	unsigned long count;
1297
1298	count = percpu_counter_read_positive(fbc: &journal->j_checkpoint_jh_count);
1299	trace_jbd2_shrink_scan_enter(journal, nr_to_scan: sc->nr_to_scan, count);
1300
1301	nr_shrunk = jbd2_journal_shrink_checkpoint_list(journal, nr_to_scan: &nr_to_scan);
1302
1303	count = percpu_counter_read_positive(fbc: &journal->j_checkpoint_jh_count);
1304	trace_jbd2_shrink_scan_exit(journal, nr_to_scan, nr_shrunk, count);
1305
1306	return nr_shrunk;
1307	}
1308
1309	/**
1310	* jbd2_journal_shrink_count()
1311	* @shrink: shrinker to work on
1312	* @sc: reclaim request to process
1313	*
1314	* Count the number of checkpoint buffers on the checkpoint list.
1315	*/
1316	static unsigned long jbd2_journal_shrink_count(struct shrinker *shrink,
1317	struct shrink_control *sc)
1318	{
1319	journal_t *journal = shrink->private_data;
1320	unsigned long count;
1321
1322	count = percpu_counter_read_positive(fbc: &journal->j_checkpoint_jh_count);
1323	trace_jbd2_shrink_count(journal, nr_to_scan: sc->nr_to_scan, count);
1324
1325	return count;
1326	}
1327
1328	/*
1329	* If the journal init or create aborts, we need to mark the journal
1330	* superblock as being NULL to prevent the journal destroy from writing
1331	* back a bogus superblock.
1332	*/
1333	static void journal_fail_superblock(journal_t *journal)
1334	{
1335	struct buffer_head *bh = journal->j_sb_buffer;
1336	brelse(bh);
1337	journal->j_sb_buffer = NULL;
1338	}
1339
1340	/*
1341	* Check the superblock for a given journal, performing initial
1342	* validation of the format.
1343	*/
1344	static int journal_check_superblock(journal_t *journal)
1345	{
1346	journal_superblock_t *sb = journal->j_superblock;
1347	int num_fc_blks;
1348	int err = -EINVAL;
1349
1350	if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) \|\|
1351	sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
1352	printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
1353	return err;
1354	}
1355
1356	if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 &&
1357	be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) {
1358	printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
1359	return err;
1360	}
1361
1362	if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
1363	printk(KERN_WARNING "JBD2: journal file too short\n");
1364	return err;
1365	}
1366
1367	if (be32_to_cpu(sb->s_first) == `0` \|\|
1368	be32_to_cpu(sb->s_first) >= journal->j_total_len) {
1369	printk(KERN_WARNING
1370	"JBD2: Invalid start block of journal: %u\n",
1371	be32_to_cpu(sb->s_first));
1372	return err;
1373	}
1374
1375	/*
1376	* If this is a V2 superblock, then we have to check the
1377	* features flags on it.
1378	*/
1379	if (!jbd2_format_support_feature(j: journal))
1380	return `0`;
1381
1382	if ((sb->s_feature_ro_compat &
1383	~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) \|\|
1384	(sb->s_feature_incompat &
1385	~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
1386	printk(KERN_WARNING "JBD2: Unrecognised features on journal\n");
1387	return err;
1388	}
1389
1390	num_fc_blks = jbd2_has_feature_fast_commit(j: journal) ?
1391	jbd2_journal_get_num_fc_blks(jsb: sb) : `0`;
1392	if (be32_to_cpu(sb->s_maxlen) < JBD2_MIN_JOURNAL_BLOCKS \|\|
1393	be32_to_cpu(sb->s_maxlen) - JBD2_MIN_JOURNAL_BLOCKS < num_fc_blks) {
1394	printk(KERN_ERR "JBD2: journal file too short %u,%d\n",
1395	be32_to_cpu(sb->s_maxlen), num_fc_blks);
1396	return err;
1397	}
1398
1399	if (jbd2_has_feature_csum2(j: journal) &&
1400	jbd2_has_feature_csum3(j: journal)) {
1401	/ Can't have checksum v2 and v3 at the same time! /
1402	printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
1403	"at the same time!\n");
1404	return err;
1405	}
1406
1407	if (jbd2_journal_has_csum_v2or3_feature(j: journal) &&
1408	jbd2_has_feature_checksum(j: journal)) {
1409	/ Can't have checksum v1 and v2 on at the same time! /
1410	printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
1411	"at the same time!\n");
1412	return err;
1413	}
1414
1415	/ Load the checksum driver /
1416	if (jbd2_journal_has_csum_v2or3_feature(j: journal)) {
1417	if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) {
1418	printk(KERN_ERR "JBD2: Unknown checksum type\n");
1419	return err;
1420	}
1421
1422	journal->j_chksum_driver = crypto_alloc_shash(alg_name: "crc32c", type: `0`, mask: `0`);
1423	if (IS_ERR(ptr: journal->j_chksum_driver)) {
1424	printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
1425	err = PTR_ERR(ptr: journal->j_chksum_driver);
1426	journal->j_chksum_driver = NULL;
1427	return err;
1428	}
1429	/ Check superblock checksum /
1430	if (sb->s_checksum != jbd2_superblock_csum(j: journal, sb)) {
1431	printk(KERN_ERR "JBD2: journal checksum error\n");
1432	err = -EFSBADCRC;
1433	return err;
1434	}
1435	}
1436
1437	return `0`;
1438	}
1439
1440	static int journal_revoke_records_per_block(journal_t *journal)
1441	{
1442	int record_size;
1443	int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
1444
1445	if (jbd2_has_feature_64bit(j: journal))
1446	record_size = `8`;
1447	else
1448	record_size = `4`;
1449
1450	if (jbd2_journal_has_csum_v2or3(journal))
1451	space -= sizeof(struct jbd2_journal_block_tail);
1452	return space / record_size;
1453	}
1454
1455	/*
1456	* Load the on-disk journal superblock and read the key fields into the
1457	* journal_t.
1458	*/
1459	static int journal_load_superblock(journal_t *journal)
1460	{
1461	int err;
1462	struct buffer_head *bh;
1463	journal_superblock_t *sb;
1464
1465	bh = getblk_unmovable(bdev: journal->j_dev, block: journal->j_blk_offset,
1466	size: journal->j_blocksize);
1467	if (bh)
1468	err = bh_read(bh, op_flags: `0`);
1469	if (!bh \|\| err < `0`) {
1470	pr_err("%s: Cannot read journal superblock\n", __func__);
1471	brelse(bh);
1472	return -EIO;
1473	}
1474
1475	journal->j_sb_buffer = bh;
1476	sb = (journal_superblock_t *)bh->b_data;
1477	journal->j_superblock = sb;
1478	err = journal_check_superblock(journal);
1479	if (err) {
1480	journal_fail_superblock(journal);
1481	return err;
1482	}
1483
1484	journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
1485	journal->j_tail = be32_to_cpu(sb->s_start);
1486	journal->j_first = be32_to_cpu(sb->s_first);
1487	journal->j_errno = be32_to_cpu(sb->s_errno);
1488	journal->j_last = be32_to_cpu(sb->s_maxlen);
1489
1490	if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
1491	journal->j_total_len = be32_to_cpu(sb->s_maxlen);
1492	/ Precompute checksum seed for all metadata /
1493	if (jbd2_journal_has_csum_v2or3(journal))
1494	journal->j_csum_seed = jbd2_chksum(journal, crc: ~`0`, address: sb->s_uuid,
1495	length: sizeof(sb->s_uuid));
1496	journal->j_revoke_records_per_block =
1497	journal_revoke_records_per_block(journal);
1498
1499	if (jbd2_has_feature_fast_commit(j: journal)) {
1500	journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
1501	journal->j_last = journal->j_fc_last -
1502	jbd2_journal_get_num_fc_blks(jsb: sb);
1503	journal->j_fc_first = journal->j_last + `1`;
1504	journal->j_fc_off = `0`;
1505	}
1506
1507	return `0`;
1508	}
1509
1510
1511	/*
1512	* Management for journal control blocks: functions to create and
1513	* destroy journal_t structures, and to initialise and read existing
1514	* journal blocks from disk. */
1515
1516	/ First: create and setup a journal_t object in memory. We initialise*
1517	* very few fields yet: that has to wait until we have created the
1518	* journal structures from from scratch, or loaded them from disk. */
1519
1520	static journal_t journal_init_common(struct* block_device *bdev,
1521	struct block_device *fs_dev,
1522	unsigned long long start, int len, int blocksize)
1523	{
1524	static struct lock_class_key jbd2_trans_commit_key;
1525	journal_t *journal;
1526	int err;
1527	int n;
1528
1529	journal = kzalloc(size: sizeof(*journal), GFP_KERNEL);
1530	if (!journal)
1531	return ERR_PTR(error: -ENOMEM);
1532
1533	journal->j_blocksize = blocksize;
1534	journal->j_dev = bdev;
1535	journal->j_fs_dev = fs_dev;
1536	journal->j_blk_offset = start;
1537	journal->j_total_len = len;
1538
1539	err = journal_load_superblock(journal);
1540	if (err)
1541	goto err_cleanup;
1542
1543	init_waitqueue_head(&journal->j_wait_transaction_locked);
1544	init_waitqueue_head(&journal->j_wait_done_commit);
1545	init_waitqueue_head(&journal->j_wait_commit);
1546	init_waitqueue_head(&journal->j_wait_updates);
1547	init_waitqueue_head(&journal->j_wait_reserved);
1548	init_waitqueue_head(&journal->j_fc_wait);
1549	mutex_init(&journal->j_abort_mutex);
1550	mutex_init(&journal->j_barrier);
1551	mutex_init(&journal->j_checkpoint_mutex);
1552	spin_lock_init(&journal->j_revoke_lock);
1553	spin_lock_init(&journal->j_list_lock);
1554	spin_lock_init(&journal->j_history_lock);
1555	rwlock_init(&journal->j_state_lock);
1556
1557	journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
1558	journal->j_min_batch_time = `0`;
1559	journal->j_max_batch_time = `15000`; / 15ms /
1560	atomic_set(v: &journal->j_reserved_credits, i: `0`);
1561	lockdep_init_map(lock: &journal->j_trans_commit_map, name: "jbd2_handle",
1562	key: &jbd2_trans_commit_key, subclass: `0`);
1563
1564	/ The journal is marked for error until we succeed with recovery! /
1565	journal->j_flags = JBD2_ABORT;
1566
1567	/ Set up a default-sized revoke table for the new mount. /
1568	err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
1569	if (err)
1570	goto err_cleanup;
1571
1572	/*
1573	* journal descriptor can store up to n blocks, we need enough
1574	* buffers to write out full descriptor block.
1575	*/
1576	err = -ENOMEM;
1577	n = journal->j_blocksize / jbd2_min_tag_size();
1578	journal->j_wbufsize = n;
1579	journal->j_fc_wbuf = NULL;
1580	journal->j_wbuf = kmalloc_array(n, size: sizeof(struct buffer_head *),
1581	GFP_KERNEL);
1582	if (!journal->j_wbuf)
1583	goto err_cleanup;
1584
1585	err = percpu_counter_init(&journal->j_checkpoint_jh_count, `0`,
1586	GFP_KERNEL);
1587	if (err)
1588	goto err_cleanup;
1589
1590	journal->j_shrink_transaction = NULL;
1591
1592	journal->j_shrinker = shrinker_alloc(flags: `0`, fmt: "jbd2-journal:(%u:%u)",
1593	MAJOR(bdev->bd_dev),
1594	MINOR(bdev->bd_dev));
1595	if (!journal->j_shrinker) {
1596	err = -ENOMEM;
1597	goto err_cleanup;
1598	}
1599
1600	journal->j_shrinker->scan_objects = jbd2_journal_shrink_scan;
1601	journal->j_shrinker->count_objects = jbd2_journal_shrink_count;
1602	journal->j_shrinker->batch = journal->j_max_transaction_buffers;
1603	journal->j_shrinker->private_data = journal;
1604
1605	shrinker_register(shrinker: journal->j_shrinker);
1606
1607	return journal;
1608
1609	err_cleanup:
1610	percpu_counter_destroy(fbc: &journal->j_checkpoint_jh_count);
1611	if (journal->j_chksum_driver)
1612	crypto_free_shash(tfm: journal->j_chksum_driver);
1613	kfree(objp: journal->j_wbuf);
1614	jbd2_journal_destroy_revoke(journal);
1615	journal_fail_superblock(journal);
1616	kfree(objp: journal);
1617	return ERR_PTR(error: err);
1618	}
1619
1620	/ jbd2_journal_init_dev and jbd2_journal_init_inode:*
1621	*
1622	* Create a journal structure assigned some fixed set of disk blocks to
1623	* the journal. We don't actually touch those disk blocks yet, but we
1624	* need to set up all of the mapping information to tell the journaling
1625	* system where the journal blocks are.
1626	*
1627	*/
1628
1629	/**
1630	* journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
1631	* @bdev: Block device on which to create the journal
1632	* @fs_dev: Device which hold journalled filesystem for this journal.
1633	* @start: Block nr Start of journal.
1634	* @len: Length of the journal in blocks.
1635	* @blocksize: blocksize of journalling device
1636	*
1637	* Returns: a newly created journal_t *
1638	*
1639	* jbd2_journal_init_dev creates a journal which maps a fixed contiguous
1640	* range of blocks on an arbitrary block device.
1641	*
1642	*/
1643	journal_t jbd2_journal_init_dev(struct* block_device *bdev,
1644	struct block_device *fs_dev,
1645	unsigned long long start, int len, int blocksize)
1646	{
1647	journal_t *journal;
1648
1649	journal = journal_init_common(bdev, fs_dev, start, len, blocksize);
1650	if (IS_ERR(ptr: journal))
1651	return ERR_CAST(ptr: journal);
1652
1653	snprintf(buf: journal->j_devname, size: sizeof(journal->j_devname),
1654	fmt: "%pg", journal->j_dev);
1655	strreplace(str: journal->j_devname, old: `'/'`, new: `'!'`);
1656	jbd2_stats_proc_init(journal);
1657
1658	return journal;
1659	}
1660
1661	/**
1662	* journal_t * jbd2_journal_init_inode () - creates a journal which maps to a inode.
1663	* @inode: An inode to create the journal in
1664	*
1665	* jbd2_journal_init_inode creates a journal which maps an on-disk inode as
1666	* the journal. The inode must exist already, must support bmap() and
1667	* must have all data blocks preallocated.
1668	*/
1669	journal_t jbd2_journal_init_inode(struct* inode *inode)
1670	{
1671	journal_t *journal;
1672	sector_t blocknr;
1673	int err = `0`;
1674
1675	blocknr = `0`;
1676	err = bmap(inode, block: &blocknr);
1677	if (err \|\| !blocknr) {
1678	pr_err("%s: Cannot locate journal superblock\n", __func__);
1679	return err ? ERR_PTR(error: err) : ERR_PTR(error: -EINVAL);
1680	}
1681
1682	jbd2_debug(`1`, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
1683	inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size,
1684	inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
1685
1686	journal = journal_init_common(bdev: inode->i_sb->s_bdev, fs_dev: inode->i_sb->s_bdev,
1687	start: blocknr, len: inode->i_size >> inode->i_sb->s_blocksize_bits,
1688	blocksize: inode->i_sb->s_blocksize);
1689	if (IS_ERR(ptr: journal))
1690	return ERR_CAST(ptr: journal);
1691
1692	journal->j_inode = inode;
1693	snprintf(buf: journal->j_devname, size: sizeof(journal->j_devname),
1694	fmt: "%pg-%lu", journal->j_dev, journal->j_inode->i_ino);
1695	strreplace(str: journal->j_devname, old: `'/'`, new: `'!'`);
1696	jbd2_stats_proc_init(journal);
1697
1698	return journal;
1699	}
1700
1701	/*
1702	* Given a journal_t structure, initialise the various fields for
1703	* startup of a new journaling session. We use this both when creating
1704	* a journal, and after recovering an old journal to reset it for
1705	* subsequent use.
1706	*/
1707
1708	static int journal_reset(journal_t *journal)
1709	{
1710	journal_superblock_t *sb = journal->j_superblock;
1711	unsigned long long first, last;
1712
1713	first = be32_to_cpu(sb->s_first);
1714	last = be32_to_cpu(sb->s_maxlen);
1715	if (first + JBD2_MIN_JOURNAL_BLOCKS > last + `1`) {
1716	printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
1717	first, last);
1718	journal_fail_superblock(journal);
1719	return -EINVAL;
1720	}
1721
1722	journal->j_first = first;
1723	journal->j_last = last;
1724
1725	if (journal->j_head != `0` && journal->j_flags & JBD2_CYCLE_RECORD) {
1726	/*
1727	* Disable the cycled recording mode if the journal head block
1728	* number is not correct.
1729	*/
1730	if (journal->j_head < first \|\| journal->j_head >= last) {
1731	printk(KERN_WARNING "JBD2: Incorrect Journal head block %lu, "
1732	"disable journal_cycle_record\n",
1733	journal->j_head);
1734	journal->j_head = journal->j_first;
1735	}
1736	} else {
1737	journal->j_head = journal->j_first;
1738	}
1739	journal->j_tail = journal->j_head;
1740	journal->j_free = journal->j_last - journal->j_first;
1741
1742	journal->j_tail_sequence = journal->j_transaction_sequence;
1743	journal->j_commit_sequence = journal->j_transaction_sequence - `1`;
1744	journal->j_commit_request = journal->j_commit_sequence;
1745
1746	journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal);
1747
1748	/*
1749	* Now that journal recovery is done, turn fast commits off here. This
1750	* way, if fast commit was enabled before the crash but if now FS has
1751	* disabled it, we don't enable fast commits.
1752	*/
1753	jbd2_clear_feature_fast_commit(j: journal);
1754
1755	/*
1756	* As a special case, if the on-disk copy is already marked as needing
1757	* no recovery (s_start == 0), then we can safely defer the superblock
1758	* update until the next commit by setting JBD2_FLUSHED. This avoids
1759	* attempting a write to a potential-readonly device.
1760	*/
1761	if (sb->s_start == `0`) {
1762	jbd2_debug(`1`, "JBD2: Skipping superblock update on recovered sb "
1763	"(start %ld, seq %u, errno %d)\n",
1764	journal->j_tail, journal->j_tail_sequence,
1765	journal->j_errno);
1766	journal->j_flags \|= JBD2_FLUSHED;
1767	} else {
1768	/ Lock here to make assertions happy... /
1769	mutex_lock_io(&journal->j_checkpoint_mutex);
1770	/*
1771	* Update log tail information. We use REQ_FUA since new
1772	* transaction will start reusing journal space and so we
1773	* must make sure information about current log tail is on
1774	* disk before that.
1775	*/
1776	jbd2_journal_update_sb_log_tail(journal,
1777	journal->j_tail_sequence,
1778	journal->j_tail,
1779	REQ_SYNC \| REQ_FUA);
1780	mutex_unlock(lock: &journal->j_checkpoint_mutex);
1781	}
1782	return jbd2_journal_start_thread(journal);
1783	}
1784
1785	/*
1786	* This function expects that the caller will have locked the journal
1787	* buffer head, and will return with it unlocked
1788	*/
1789	static int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags)
1790	{
1791	struct buffer_head *bh = journal->j_sb_buffer;
1792	journal_superblock_t *sb = journal->j_superblock;
1793	int ret = `0`;
1794
1795	/ Buffer got discarded which means block device got invalidated /
1796	if (!buffer_mapped(bh)) {
1797	unlock_buffer(bh);
1798	return -EIO;
1799	}
1800
1801	trace_jbd2_write_superblock(journal, write_flags);
1802	if (!(journal->j_flags & JBD2_BARRIER))
1803	write_flags &= ~(REQ_FUA \| REQ_PREFLUSH);
1804	if (buffer_write_io_error(bh)) {
1805	/*
1806	* Oh, dear. A previous attempt to write the journal
1807	* superblock failed. This could happen because the
1808	* USB device was yanked out. Or it could happen to
1809	* be a transient write error and maybe the block will
1810	* be remapped. Nothing we can do but to retry the
1811	* write and hope for the best.
1812	*/
1813	printk(KERN_ERR "JBD2: previous I/O error detected "
1814	"for journal superblock update for %s.\n",
1815	journal->j_devname);
1816	clear_buffer_write_io_error(bh);
1817	set_buffer_uptodate(bh);
1818	}
1819	if (jbd2_journal_has_csum_v2or3(journal))
1820	sb->s_checksum = jbd2_superblock_csum(j: journal, sb);
1821	get_bh(bh);
1822	bh->b_end_io = end_buffer_write_sync;
1823	submit_bh(REQ_OP_WRITE \| write_flags, bh);
1824	wait_on_buffer(bh);
1825	if (buffer_write_io_error(bh)) {
1826	clear_buffer_write_io_error(bh);
1827	set_buffer_uptodate(bh);
1828	ret = -EIO;
1829	}
1830	if (ret) {
1831	printk(KERN_ERR "JBD2: I/O error when updating journal superblock for %s.\n",
1832	journal->j_devname);
1833	if (!is_journal_aborted(journal))
1834	jbd2_journal_abort(journal, ret);
1835	}
1836
1837	return ret;
1838	}
1839
1840	/**
1841	* jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
1842	* @journal: The journal to update.
1843	* @tail_tid: TID of the new transaction at the tail of the log
1844	* @tail_block: The first block of the transaction at the tail of the log
1845	* @write_flags: Flags for the journal sb write operation
1846	*
1847	* Update a journal's superblock information about log tail and write it to
1848	* disk, waiting for the IO to complete.
1849	*/
1850	int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
1851	unsigned long tail_block,
1852	blk_opf_t write_flags)
1853	{
1854	journal_superblock_t *sb = journal->j_superblock;
1855	int ret;
1856
1857	if (is_journal_aborted(journal))
1858	return -EIO;
1859	if (test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags)) {
1860	jbd2_journal_abort(journal, -EIO);
1861	return -EIO;
1862	}
1863
1864	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1865	jbd2_debug(`1`, "JBD2: updating superblock (start %lu, seq %u)\n",
1866	tail_block, tail_tid);
1867
1868	lock_buffer(bh: journal->j_sb_buffer);
1869	sb->s_sequence = cpu_to_be32(tail_tid);
1870	sb->s_start = cpu_to_be32(tail_block);
1871
1872	ret = jbd2_write_superblock(journal, write_flags);
1873	if (ret)
1874	goto out;
1875
1876	/ Log is no longer empty /
1877	write_lock(&journal->j_state_lock);
1878	WARN_ON(!sb->s_sequence);
1879	journal->j_flags &= ~JBD2_FLUSHED;
1880	write_unlock(&journal->j_state_lock);
1881
1882	out:
1883	return ret;
1884	}
1885
1886	/**
1887	* jbd2_mark_journal_empty() - Mark on disk journal as empty.
1888	* @journal: The journal to update.
1889	* @write_flags: Flags for the journal sb write operation
1890	*
1891	* Update a journal's dynamic superblock fields to show that journal is empty.
1892	* Write updated superblock to disk waiting for IO to complete.
1893	*/
1894	static void jbd2_mark_journal_empty(journal_t *journal, blk_opf_t write_flags)
1895	{
1896	journal_superblock_t *sb = journal->j_superblock;
1897	bool had_fast_commit = false;
1898
1899	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1900	lock_buffer(bh: journal->j_sb_buffer);
1901	if (sb->s_start == `0`) { / Is it already empty? /
1902	unlock_buffer(bh: journal->j_sb_buffer);
1903	return;
1904	}
1905
1906	jbd2_debug(`1`, "JBD2: Marking journal as empty (seq %u)\n",
1907	journal->j_tail_sequence);
1908
1909	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1910	sb->s_start = cpu_to_be32(`0`);
1911	sb->s_head = cpu_to_be32(journal->j_head);
1912	if (jbd2_has_feature_fast_commit(j: journal)) {
1913	/*
1914	* When journal is clean, no need to commit fast commit flag and
1915	* make file system incompatible with older kernels.
1916	*/
1917	jbd2_clear_feature_fast_commit(j: journal);
1918	had_fast_commit = true;
1919	}
1920
1921	jbd2_write_superblock(journal, write_flags);
1922
1923	if (had_fast_commit)
1924	jbd2_set_feature_fast_commit(j: journal);
1925
1926	/ Log is no longer empty /
1927	write_lock(&journal->j_state_lock);
1928	journal->j_flags \|= JBD2_FLUSHED;
1929	write_unlock(&journal->j_state_lock);
1930	}
1931
1932	/**
1933	* __jbd2_journal_erase() - Discard or zeroout journal blocks (excluding superblock)
1934	* @journal: The journal to erase.
1935	* @flags: A discard/zeroout request is sent for each physically contigous
1936	* region of the journal. Either JBD2_JOURNAL_FLUSH_DISCARD or
1937	* JBD2_JOURNAL_FLUSH_ZEROOUT must be set to determine which operation
1938	* to perform.
1939	*
1940	* Note: JBD2_JOURNAL_FLUSH_ZEROOUT attempts to use hardware offload. Zeroes
1941	* will be explicitly written if no hardware offload is available, see
1942	* blkdev_issue_zeroout for more details.
1943	*/
1944	static int __jbd2_journal_erase(journal_t journal, unsigned* int flags)
1945	{
1946	int err = `0`;
1947	unsigned long block, log_offset; / logical /
1948	unsigned long long phys_block, block_start, block_stop; / physical /
1949	loff_t byte_start, byte_stop, byte_count;
1950
1951	/ flags must be set to either discard or zeroout /
1952	if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) \|\| !flags \|\|
1953	((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
1954	(flags & JBD2_JOURNAL_FLUSH_ZEROOUT)))
1955	return -EINVAL;
1956
1957	if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
1958	!bdev_max_discard_sectors(bdev: journal->j_dev))
1959	return -EOPNOTSUPP;
1960
1961	/*
1962	* lookup block mapping and issue discard/zeroout for each
1963	* contiguous region
1964	*/
1965	log_offset = be32_to_cpu(journal->j_superblock->s_first);
1966	block_start = ~`0ULL`;
1967	for (block = log_offset; block < journal->j_total_len; block++) {
1968	err = jbd2_journal_bmap(journal, blocknr: block, retp: &phys_block);
1969	if (err) {
1970	pr_err("JBD2: bad block at offset %lu", block);
1971	return err;
1972	}
1973
1974	if (block_start == ~`0ULL`) {
1975	block_start = phys_block;
1976	block_stop = block_start - `1`;
1977	}
1978
1979	/*
1980	* last block not contiguous with current block,
1981	* process last contiguous region and return to this block on
1982	* next loop
1983	*/
1984	if (phys_block != block_stop + `1`) {
1985	block--;
1986	} else {
1987	block_stop++;
1988	/*
1989	* if this isn't the last block of journal,
1990	* no need to process now because next block may also
1991	* be part of this contiguous region
1992	*/
1993	if (block != journal->j_total_len - `1`)
1994	continue;
1995	}
1996
1997	/*
1998	* end of contiguous region or this is last block of journal,
1999	* take care of the region
2000	*/
2001	byte_start = block_start * journal->j_blocksize;
2002	byte_stop = block_stop * journal->j_blocksize;
2003	byte_count = (block_stop - block_start + `1`) *
2004	journal->j_blocksize;
2005
2006	truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping,
2007	lstart: byte_start, lend: byte_stop);
2008
2009	if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
2010	err = blkdev_issue_discard(bdev: journal->j_dev,
2011	sector: byte_start >> SECTOR_SHIFT,
2012	nr_sects: byte_count >> SECTOR_SHIFT,
2013	GFP_NOFS);
2014	} else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) {
2015	err = blkdev_issue_zeroout(bdev: journal->j_dev,
2016	sector: byte_start >> SECTOR_SHIFT,
2017	nr_sects: byte_count >> SECTOR_SHIFT,
2018	GFP_NOFS, flags: `0`);
2019	}
2020
2021	if (unlikely(err != `0`)) {
2022	pr_err("JBD2: (error %d) unable to wipe journal at physical blocks %llu - %llu",
2023	err, block_start, block_stop);
2024	return err;
2025	}
2026
2027	/ reset start and stop after processing a region /
2028	block_start = ~`0ULL`;
2029	}
2030
2031	return blkdev_issue_flush(bdev: journal->j_dev);
2032	}
2033
2034	/**
2035	* jbd2_journal_update_sb_errno() - Update error in the journal.
2036	* @journal: The journal to update.
2037	*
2038	* Update a journal's errno. Write updated superblock to disk waiting for IO
2039	* to complete.
2040	*/
2041	void jbd2_journal_update_sb_errno(journal_t *journal)
2042	{
2043	journal_superblock_t *sb = journal->j_superblock;
2044	int errcode;
2045
2046	lock_buffer(bh: journal->j_sb_buffer);
2047	errcode = journal->j_errno;
2048	if (errcode == -ESHUTDOWN)
2049	errcode = `0`;
2050	jbd2_debug(`1`, "JBD2: updating superblock error (errno %d)\n", errcode);
2051	sb->s_errno = cpu_to_be32(errcode);
2052
2053	jbd2_write_superblock(journal, REQ_SYNC \| REQ_FUA);
2054	}
2055	EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
2056
2057	/**
2058	* jbd2_journal_load() - Read journal from disk.
2059	* @journal: Journal to act on.
2060	*
2061	* Given a journal_t structure which tells us which disk blocks contain
2062	* a journal, read the journal from disk to initialise the in-memory
2063	* structures.
2064	*/
2065	int jbd2_journal_load(journal_t *journal)
2066	{
2067	int err;
2068	journal_superblock_t *sb = journal->j_superblock;
2069
2070	/*
2071	* Create a slab for this blocksize
2072	*/
2073	err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize));
2074	if (err)
2075	return err;
2076
2077	/ Let the recovery code check whether it needs to recover any*
2078	* data from the journal. */
2079	err = jbd2_journal_recover(journal);
2080	if (err) {
2081	pr_warn("JBD2: journal recovery failed\n");
2082	return err;
2083	}
2084
2085	if (journal->j_failed_commit) {
2086	printk(KERN_ERR "JBD2: journal transaction %u on %s "
2087	"is corrupt.\n", journal->j_failed_commit,
2088	journal->j_devname);
2089	return -EFSCORRUPTED;
2090	}
2091	/*
2092	* clear JBD2_ABORT flag initialized in journal_init_common
2093	* here to update log tail information with the newest seq.
2094	*/
2095	journal->j_flags &= ~JBD2_ABORT;
2096
2097	/ OK, we've finished with the dynamic journal bits:*
2098	* reinitialise the dynamic contents of the superblock in memory
2099	* and reset them on disk. */
2100	err = journal_reset(journal);
2101	if (err) {
2102	pr_warn("JBD2: journal reset failed\n");
2103	return err;
2104	}
2105
2106	journal->j_flags \|= JBD2_LOADED;
2107	return `0`;
2108	}
2109
2110	/**
2111	* jbd2_journal_destroy() - Release a journal_t structure.
2112	* @journal: Journal to act on.
2113	*
2114	* Release a journal_t structure once it is no longer in use by the
2115	* journaled object.
2116	* Return <0 if we couldn't clean up the journal.
2117	*/
2118	int jbd2_journal_destroy(journal_t *journal)
2119	{
2120	int err = `0`;
2121
2122	/ Wait for the commit thread to wake up and die. /
2123	journal_kill_thread(journal);
2124
2125	/ Force a final log commit /
2126	if (journal->j_running_transaction)
2127	jbd2_journal_commit_transaction(journal);
2128
2129	/ Force any old transactions to disk /
2130
2131	/ Totally anal locking here... /
2132	spin_lock(lock: &journal->j_list_lock);
2133	while (journal->j_checkpoint_transactions != NULL) {
2134	spin_unlock(lock: &journal->j_list_lock);
2135	mutex_lock_io(&journal->j_checkpoint_mutex);
2136	err = jbd2_log_do_checkpoint(journal);
2137	mutex_unlock(lock: &journal->j_checkpoint_mutex);
2138	/*
2139	* If checkpointing failed, just free the buffers to avoid
2140	* looping forever
2141	*/
2142	if (err) {
2143	jbd2_journal_destroy_checkpoint(journal);
2144	spin_lock(lock: &journal->j_list_lock);
2145	break;
2146	}
2147	spin_lock(lock: &journal->j_list_lock);
2148	}
2149
2150	J_ASSERT(journal->j_running_transaction == NULL);
2151	J_ASSERT(journal->j_committing_transaction == NULL);
2152	J_ASSERT(journal->j_checkpoint_transactions == NULL);
2153	spin_unlock(lock: &journal->j_list_lock);
2154
2155	/*
2156	* OK, all checkpoint transactions have been checked, now check the
2157	* write out io error flag and abort the journal if some buffer failed
2158	* to write back to the original location, otherwise the filesystem
2159	* may become inconsistent.
2160	*/
2161	if (!is_journal_aborted(journal) &&
2162	test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags))
2163	jbd2_journal_abort(journal, -EIO);
2164
2165	if (journal->j_sb_buffer) {
2166	if (!is_journal_aborted(journal)) {
2167	mutex_lock_io(&journal->j_checkpoint_mutex);
2168
2169	write_lock(&journal->j_state_lock);
2170	journal->j_tail_sequence =
2171	++journal->j_transaction_sequence;
2172	write_unlock(&journal->j_state_lock);
2173
2174	jbd2_mark_journal_empty(journal,
2175	REQ_SYNC \| REQ_PREFLUSH \| REQ_FUA);
2176	mutex_unlock(lock: &journal->j_checkpoint_mutex);
2177	} else
2178	err = -EIO;
2179	brelse(bh: journal->j_sb_buffer);
2180	}
2181
2182	if (journal->j_shrinker) {
2183	percpu_counter_destroy(fbc: &journal->j_checkpoint_jh_count);
2184	shrinker_free(shrinker: journal->j_shrinker);
2185	}
2186	if (journal->j_proc_entry)
2187	jbd2_stats_proc_exit(journal);
2188	iput(journal->j_inode);
2189	if (journal->j_revoke)
2190	jbd2_journal_destroy_revoke(journal);
2191	if (journal->j_chksum_driver)
2192	crypto_free_shash(tfm: journal->j_chksum_driver);
2193	kfree(objp: journal->j_fc_wbuf);
2194	kfree(objp: journal->j_wbuf);
2195	kfree(objp: journal);
2196
2197	return err;
2198	}
2199
2200
2201	/**
2202	* jbd2_journal_check_used_features() - Check if features specified are used.
2203	* @journal: Journal to check.
2204	* @compat: bitmask of compatible features
2205	* @ro: bitmask of features that force read-only mount
2206	* @incompat: bitmask of incompatible features
2207	*
2208	* Check whether the journal uses all of a given set of
2209	* features. Return true (non-zero) if it does.
2210	**/
2211
2212	int jbd2_journal_check_used_features(journal_t journal, unsigned* long compat,
2213	unsigned long ro, unsigned long incompat)
2214	{
2215	journal_superblock_t *sb;
2216
2217	if (!compat && !ro && !incompat)
2218	return `1`;
2219	if (!jbd2_format_support_feature(j: journal))
2220	return `0`;
2221
2222	sb = journal->j_superblock;
2223
2224	if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
2225	((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
2226	((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
2227	return `1`;
2228
2229	return `0`;
2230	}
2231
2232	/**
2233	* jbd2_journal_check_available_features() - Check feature set in journalling layer
2234	* @journal: Journal to check.
2235	* @compat: bitmask of compatible features
2236	* @ro: bitmask of features that force read-only mount
2237	* @incompat: bitmask of incompatible features
2238	*
2239	* Check whether the journaling code supports the use of
2240	* all of a given set of features on this journal. Return true
2241	* (non-zero) if it can. */
2242
2243	int jbd2_journal_check_available_features(journal_t journal, unsigned* long compat,
2244	unsigned long ro, unsigned long incompat)
2245	{
2246	if (!compat && !ro && !incompat)
2247	return `1`;
2248
2249	if (!jbd2_format_support_feature(j: journal))
2250	return `0`;
2251
2252	if ((compat & JBD2_KNOWN_COMPAT_FEATURES) == compat &&
2253	(ro & JBD2_KNOWN_ROCOMPAT_FEATURES) == ro &&
2254	(incompat & JBD2_KNOWN_INCOMPAT_FEATURES) == incompat)
2255	return `1`;
2256
2257	return `0`;
2258	}
2259
2260	static int
2261	jbd2_journal_initialize_fast_commit(journal_t *journal)
2262	{
2263	journal_superblock_t *sb = journal->j_superblock;
2264	unsigned long long num_fc_blks;
2265
2266	num_fc_blks = jbd2_journal_get_num_fc_blks(jsb: sb);
2267	if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS)
2268	return -ENOSPC;
2269
2270	/ Are we called twice? /
2271	WARN_ON(journal->j_fc_wbuf != NULL);
2272	journal->j_fc_wbuf = kmalloc_array(n: num_fc_blks,
2273	size: sizeof(struct buffer_head *), GFP_KERNEL);
2274	if (!journal->j_fc_wbuf)
2275	return -ENOMEM;
2276
2277	journal->j_fc_wbufsize = num_fc_blks;
2278	journal->j_fc_last = journal->j_last;
2279	journal->j_last = journal->j_fc_last - num_fc_blks;
2280	journal->j_fc_first = journal->j_last + `1`;
2281	journal->j_fc_off = `0`;
2282	journal->j_free = journal->j_last - journal->j_first;
2283	journal->j_max_transaction_buffers =
2284	jbd2_journal_get_max_txn_bufs(journal);
2285
2286	return `0`;
2287	}
2288
2289	/**
2290	* jbd2_journal_set_features() - Mark a given journal feature in the superblock
2291	* @journal: Journal to act on.
2292	* @compat: bitmask of compatible features
2293	* @ro: bitmask of features that force read-only mount
2294	* @incompat: bitmask of incompatible features
2295	*
2296	* Mark a given journal feature as present on the
2297	* superblock. Returns true if the requested features could be set.
2298	*
2299	*/
2300
2301	int jbd2_journal_set_features(journal_t journal, unsigned* long compat,
2302	unsigned long ro, unsigned long incompat)
2303	{
2304	#define INCOMPAT_FEATURE_ON(f) \
2305	((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f)))
2306	#define COMPAT_FEATURE_ON(f) \
2307	((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f)))
2308	journal_superblock_t *sb;
2309
2310	if (jbd2_journal_check_used_features(journal, compat, ro, incompat))
2311	return `1`;
2312
2313	if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
2314	return `0`;
2315
2316	/ If enabling v2 checksums, turn on v3 instead /
2317	if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) {
2318	incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2;
2319	incompat \|= JBD2_FEATURE_INCOMPAT_CSUM_V3;
2320	}
2321
2322	/ Asking for checksumming v3 and v1? Only give them v3. /
2323	if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 &&
2324	compat & JBD2_FEATURE_COMPAT_CHECKSUM)
2325	compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
2326
2327	jbd2_debug(`1`, "Setting new features 0x%lx/0x%lx/0x%lx\n",
2328	compat, ro, incompat);
2329
2330	sb = journal->j_superblock;
2331
2332	if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) {
2333	if (jbd2_journal_initialize_fast_commit(journal)) {
2334	pr_err("JBD2: Cannot enable fast commits.\n");
2335	return `0`;
2336	}
2337	}
2338
2339	/ Load the checksum driver if necessary /
2340	if ((journal->j_chksum_driver == NULL) &&
2341	INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
2342	journal->j_chksum_driver = crypto_alloc_shash(alg_name: "crc32c", type: `0`, mask: `0`);
2343	if (IS_ERR(ptr: journal->j_chksum_driver)) {
2344	printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
2345	journal->j_chksum_driver = NULL;
2346	return `0`;
2347	}
2348	/ Precompute checksum seed for all metadata /
2349	journal->j_csum_seed = jbd2_chksum(journal, crc: ~`0`, address: sb->s_uuid,
2350	length: sizeof(sb->s_uuid));
2351	}
2352
2353	lock_buffer(bh: journal->j_sb_buffer);
2354
2355	/ If enabling v3 checksums, update superblock /
2356	if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
2357	sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
2358	sb->s_feature_compat &=
2359	~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
2360	}
2361
2362	/ If enabling v1 checksums, downgrade superblock /
2363	if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
2364	sb->s_feature_incompat &=
2365	~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 \|
2366	JBD2_FEATURE_INCOMPAT_CSUM_V3);
2367
2368	sb->s_feature_compat \|= cpu_to_be32(compat);
2369	sb->s_feature_ro_compat \|= cpu_to_be32(ro);
2370	sb->s_feature_incompat \|= cpu_to_be32(incompat);
2371	unlock_buffer(bh: journal->j_sb_buffer);
2372	journal->j_revoke_records_per_block =
2373	journal_revoke_records_per_block(journal);
2374
2375	return `1`;
2376	#undef COMPAT_FEATURE_ON
2377	#undef INCOMPAT_FEATURE_ON
2378	}
2379
2380	/*
2381	* jbd2_journal_clear_features() - Clear a given journal feature in the
2382	* superblock
2383	* @journal: Journal to act on.
2384	* @compat: bitmask of compatible features
2385	* @ro: bitmask of features that force read-only mount
2386	* @incompat: bitmask of incompatible features
2387	*
2388	* Clear a given journal feature as present on the
2389	* superblock.
2390	*/
2391	void jbd2_journal_clear_features(journal_t journal, unsigned* long compat,
2392	unsigned long ro, unsigned long incompat)
2393	{
2394	journal_superblock_t *sb;
2395
2396	jbd2_debug(`1`, "Clear features 0x%lx/0x%lx/0x%lx\n",
2397	compat, ro, incompat);
2398
2399	sb = journal->j_superblock;
2400
2401	sb->s_feature_compat &= ~cpu_to_be32(compat);
2402	sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
2403	sb->s_feature_incompat &= ~cpu_to_be32(incompat);
2404	journal->j_revoke_records_per_block =
2405	journal_revoke_records_per_block(journal);
2406	}
2407	EXPORT_SYMBOL(jbd2_journal_clear_features);
2408
2409	/**
2410	* jbd2_journal_flush() - Flush journal
2411	* @journal: Journal to act on.
2412	* @flags: optional operation on the journal blocks after the flush (see below)
2413	*
2414	* Flush all data for a given journal to disk and empty the journal.
2415	* Filesystems can use this when remounting readonly to ensure that
2416	* recovery does not need to happen on remount. Optionally, a discard or zeroout
2417	* can be issued on the journal blocks after flushing.
2418	*
2419	* flags:
2420	* JBD2_JOURNAL_FLUSH_DISCARD: issues discards for the journal blocks
2421	* JBD2_JOURNAL_FLUSH_ZEROOUT: issues zeroouts for the journal blocks
2422	*/
2423	int jbd2_journal_flush(journal_t journal, unsigned* int flags)
2424	{
2425	int err = `0`;
2426	transaction_t *transaction = NULL;
2427
2428	write_lock(&journal->j_state_lock);
2429
2430	/ Force everything buffered to the log... /
2431	if (journal->j_running_transaction) {
2432	transaction = journal->j_running_transaction;
2433	__jbd2_log_start_commit(journal, target: transaction->t_tid);
2434	} else if (journal->j_committing_transaction)
2435	transaction = journal->j_committing_transaction;
2436
2437	/ Wait for the log commit to complete... /
2438	if (transaction) {
2439	tid_t tid = transaction->t_tid;
2440
2441	write_unlock(&journal->j_state_lock);
2442	jbd2_log_wait_commit(journal, tid);
2443	} else {
2444	write_unlock(&journal->j_state_lock);
2445	}
2446
2447	/ ...and flush everything in the log out to disk. /
2448	spin_lock(lock: &journal->j_list_lock);
2449	while (!err && journal->j_checkpoint_transactions != NULL) {
2450	spin_unlock(lock: &journal->j_list_lock);
2451	mutex_lock_io(&journal->j_checkpoint_mutex);
2452	err = jbd2_log_do_checkpoint(journal);
2453	mutex_unlock(lock: &journal->j_checkpoint_mutex);
2454	spin_lock(lock: &journal->j_list_lock);
2455	}
2456	spin_unlock(lock: &journal->j_list_lock);
2457
2458	if (is_journal_aborted(journal))
2459	return -EIO;
2460
2461	mutex_lock_io(&journal->j_checkpoint_mutex);
2462	if (!err) {
2463	err = jbd2_cleanup_journal_tail(journal);
2464	if (err < `0`) {
2465	mutex_unlock(lock: &journal->j_checkpoint_mutex);
2466	goto out;
2467	}
2468	err = `0`;
2469	}
2470
2471	/ Finally, mark the journal as really needing no recovery.*
2472	* This sets s_start==0 in the underlying superblock, which is
2473	* the magic code for a fully-recovered superblock. Any future
2474	* commits of data to the journal will restore the current
2475	* s_start value. */
2476	jbd2_mark_journal_empty(journal, REQ_SYNC \| REQ_FUA);
2477
2478	if (flags)
2479	err = __jbd2_journal_erase(journal, flags);
2480
2481	mutex_unlock(lock: &journal->j_checkpoint_mutex);
2482	write_lock(&journal->j_state_lock);
2483	J_ASSERT(!journal->j_running_transaction);
2484	J_ASSERT(!journal->j_committing_transaction);
2485	J_ASSERT(!journal->j_checkpoint_transactions);
2486	J_ASSERT(journal->j_head == journal->j_tail);
2487	J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
2488	write_unlock(&journal->j_state_lock);
2489	out:
2490	return err;
2491	}
2492
2493	/**
2494	* jbd2_journal_wipe() - Wipe journal contents
2495	* @journal: Journal to act on.
2496	* @write: flag (see below)
2497	*
2498	* Wipe out all of the contents of a journal, safely. This will produce
2499	* a warning if the journal contains any valid recovery information.
2500	* Must be called between journal_init_*() and jbd2_journal_load().
2501	*
2502	* If 'write' is non-zero, then we wipe out the journal on disk; otherwise
2503	* we merely suppress recovery.
2504	*/
2505
2506	int jbd2_journal_wipe(journal_t journal, int* write)
2507	{
2508	int err;
2509
2510	J_ASSERT (!(journal->j_flags & JBD2_LOADED));
2511
2512	if (!journal->j_tail)
2513	return `0`;
2514
2515	printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
2516	write ? "Clearing" : "Ignoring");
2517
2518	err = jbd2_journal_skip_recovery(journal);
2519	if (write) {
2520	/ Lock to make assertions happy... /
2521	mutex_lock_io(&journal->j_checkpoint_mutex);
2522	jbd2_mark_journal_empty(journal, REQ_SYNC \| REQ_FUA);
2523	mutex_unlock(lock: &journal->j_checkpoint_mutex);
2524	}
2525
2526	return err;
2527	}
2528
2529	/**
2530	* jbd2_journal_abort () - Shutdown the journal immediately.
2531	* @journal: the journal to shutdown.
2532	* @errno: an error number to record in the journal indicating
2533	* the reason for the shutdown.
2534	*
2535	* Perform a complete, immediate shutdown of the ENTIRE
2536	* journal (not of a single transaction). This operation cannot be
2537	* undone without closing and reopening the journal.
2538	*
2539	* The jbd2_journal_abort function is intended to support higher level error
2540	* recovery mechanisms such as the ext2/ext3 remount-readonly error
2541	* mode.
2542	*
2543	* Journal abort has very specific semantics. Any existing dirty,
2544	* unjournaled buffers in the main filesystem will still be written to
2545	* disk by bdflush, but the journaling mechanism will be suspended
2546	* immediately and no further transaction commits will be honoured.
2547	*
2548	* Any dirty, journaled buffers will be written back to disk without
2549	* hitting the journal. Atomicity cannot be guaranteed on an aborted
2550	* filesystem, but we _do_ attempt to leave as much data as possible
2551	* behind for fsck to use for cleanup.
2552	*
2553	* Any attempt to get a new transaction handle on a journal which is in
2554	* ABORT state will just result in an -EROFS error return. A
2555	* jbd2_journal_stop on an existing handle will return -EIO if we have
2556	* entered abort state during the update.
2557	*
2558	* Recursive transactions are not disturbed by journal abort until the
2559	* final jbd2_journal_stop, which will receive the -EIO error.
2560	*
2561	* Finally, the jbd2_journal_abort call allows the caller to supply an errno
2562	* which will be recorded (if possible) in the journal superblock. This
2563	* allows a client to record failure conditions in the middle of a
2564	* transaction without having to complete the transaction to record the
2565	* failure to disk. ext3_error, for example, now uses this
2566	* functionality.
2567	*
2568	*/
2569
2570	void jbd2_journal_abort(journal_t journal, int* errno)
2571	{
2572	transaction_t *transaction;
2573
2574	/*
2575	* Lock the aborting procedure until everything is done, this avoid
2576	* races between filesystem's error handling flow (e.g. ext4_abort()),
2577	* ensure panic after the error info is written into journal's
2578	* superblock.
2579	*/
2580	mutex_lock(&journal->j_abort_mutex);
2581	/*
2582	* ESHUTDOWN always takes precedence because a file system check
2583	* caused by any other journal abort error is not required after
2584	* a shutdown triggered.
2585	*/
2586	write_lock(&journal->j_state_lock);
2587	if (journal->j_flags & JBD2_ABORT) {
2588	int old_errno = journal->j_errno;
2589
2590	write_unlock(&journal->j_state_lock);
2591	if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) {
2592	journal->j_errno = errno;
2593	jbd2_journal_update_sb_errno(journal);
2594	}
2595	mutex_unlock(lock: &journal->j_abort_mutex);
2596	return;
2597	}
2598
2599	/*
2600	* Mark the abort as occurred and start current running transaction
2601	* to release all journaled buffer.
2602	*/
2603	pr_err("Aborting journal on device %s.\n", journal->j_devname);
2604
2605	journal->j_flags \|= JBD2_ABORT;
2606	journal->j_errno = errno;
2607	transaction = journal->j_running_transaction;
2608	if (transaction)
2609	__jbd2_log_start_commit(journal, target: transaction->t_tid);
2610	write_unlock(&journal->j_state_lock);
2611
2612	/*
2613	* Record errno to the journal super block, so that fsck and jbd2
2614	* layer could realise that a filesystem check is needed.
2615	*/
2616	jbd2_journal_update_sb_errno(journal);
2617	mutex_unlock(lock: &journal->j_abort_mutex);
2618	}
2619
2620	/**
2621	* jbd2_journal_errno() - returns the journal's error state.
2622	* @journal: journal to examine.
2623	*
2624	* This is the errno number set with jbd2_journal_abort(), the last
2625	* time the journal was mounted - if the journal was stopped
2626	* without calling abort this will be 0.
2627	*
2628	* If the journal has been aborted on this mount time -EROFS will
2629	* be returned.
2630	*/
2631	int jbd2_journal_errno(journal_t *journal)
2632	{
2633	int err;
2634
2635	read_lock(&journal->j_state_lock);
2636	if (journal->j_flags & JBD2_ABORT)
2637	err = -EROFS;
2638	else
2639	err = journal->j_errno;
2640	read_unlock(&journal->j_state_lock);
2641	return err;
2642	}
2643
2644	/**
2645	* jbd2_journal_clear_err() - clears the journal's error state
2646	* @journal: journal to act on.
2647	*
2648	* An error must be cleared or acked to take a FS out of readonly
2649	* mode.
2650	*/
2651	int jbd2_journal_clear_err(journal_t *journal)
2652	{
2653	int err = `0`;
2654
2655	write_lock(&journal->j_state_lock);
2656	if (journal->j_flags & JBD2_ABORT)
2657	err = -EROFS;
2658	else
2659	journal->j_errno = `0`;
2660	write_unlock(&journal->j_state_lock);
2661	return err;
2662	}
2663
2664	/**
2665	* jbd2_journal_ack_err() - Ack journal err.
2666	* @journal: journal to act on.
2667	*
2668	* An error must be cleared or acked to take a FS out of readonly
2669	* mode.
2670	*/
2671	void jbd2_journal_ack_err(journal_t *journal)
2672	{
2673	write_lock(&journal->j_state_lock);
2674	if (journal->j_errno)
2675	journal->j_flags \|= JBD2_ACK_ERR;
2676	write_unlock(&journal->j_state_lock);
2677	}
2678
2679	int jbd2_journal_blocks_per_page(struct inode *inode)
2680	{
2681	return `1` << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
2682	}
2683
2684	/*
2685	* helper functions to deal with 32 or 64bit block numbers.
2686	*/
2687	size_t journal_tag_bytes(journal_t *journal)
2688	{
2689	size_t sz;
2690
2691	if (jbd2_has_feature_csum3(j: journal))
2692	return sizeof(journal_block_tag3_t);
2693
2694	sz = sizeof(journal_block_tag_t);
2695
2696	if (jbd2_has_feature_csum2(j: journal))
2697	sz += sizeof(__u16);
2698
2699	if (jbd2_has_feature_64bit(j: journal))
2700	return sz;
2701	else
2702	return sz - sizeof(__u32);
2703	}
2704
2705	/*
2706	* JBD memory management
2707	*
2708	* These functions are used to allocate block-sized chunks of memory
2709	* used for making copies of buffer_head data. Very often it will be
2710	* page-sized chunks of data, but sometimes it will be in
2711	* sub-page-size chunks. (For example, 16k pages on Power systems
2712	* with a 4k block file system.) For blocks smaller than a page, we
2713	* use a SLAB allocator. There are slab caches for each block size,
2714	* which are allocated at mount time, if necessary, and we only free
2715	* (all of) the slab caches when/if the jbd2 module is unloaded. For
2716	* this reason we don't need to a mutex to protect access to
2717	* jbd2_slab[] allocating or releasing memory; only in
2718	* jbd2_journal_create_slab().
2719	*/
2720	#define JBD2_MAX_SLABS 8
2721	static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
2722
2723	static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
2724	"jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
2725	"jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k"
2726	};
2727
2728
2729	static void jbd2_journal_destroy_slabs(void)
2730	{
2731	int i;
2732
2733	for (i = `0`; i < JBD2_MAX_SLABS; i++) {
2734	kmem_cache_destroy(s: jbd2_slab[i]);
2735	jbd2_slab[i] = NULL;
2736	}
2737	}
2738
2739	static int jbd2_journal_create_slab(size_t size)
2740	{
2741	static DEFINE_MUTEX(jbd2_slab_create_mutex);
2742	int i = order_base_2(size) - `10`;
2743	size_t slab_size;
2744
2745	if (size == PAGE_SIZE)
2746	return `0`;
2747
2748	if (i >= JBD2_MAX_SLABS)
2749	return -EINVAL;
2750
2751	if (unlikely(i < `0`))
2752	i = `0`;
2753	mutex_lock(&jbd2_slab_create_mutex);
2754	if (jbd2_slab[i]) {
2755	mutex_unlock(lock: &jbd2_slab_create_mutex);
2756	return `0`; / Already created /
2757	}
2758
2759	slab_size = `1` << (i+`10`);
2760	jbd2_slab[i] = kmem_cache_create(name: jbd2_slab_names[i], size: slab_size,
2761	align: slab_size, flags: `0`, NULL);
2762	mutex_unlock(lock: &jbd2_slab_create_mutex);
2763	if (!jbd2_slab[i]) {
2764	printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
2765	return -ENOMEM;
2766	}
2767	return `0`;
2768	}
2769
2770	static struct kmem_cache *get_slab(size_t size)
2771	{
2772	int i = order_base_2(size) - `10`;
2773
2774	BUG_ON(i >= JBD2_MAX_SLABS);
2775	if (unlikely(i < `0`))
2776	i = `0`;
2777	BUG_ON(jbd2_slab[i] == NULL);
2778	return jbd2_slab[i];
2779	}
2780
2781	void *jbd2_alloc(size_t size, gfp_t flags)
2782	{
2783	void *ptr;
2784
2785	BUG_ON(size & (size-`1`)); / Must be a power of 2 /
2786
2787	if (size < PAGE_SIZE)
2788	ptr = kmem_cache_alloc(cachep: get_slab(size), flags);
2789	else
2790	ptr = (void *)__get_free_pages(gfp_mask: flags, order: get_order(size));
2791
2792	/ Check alignment; SLUB has gotten this wrong in the past,*
2793	* and this can lead to user data corruption! */
2794	BUG_ON(((unsigned long) ptr) & (size-`1`));
2795
2796	return ptr;
2797	}
2798
2799	void jbd2_free(void *ptr, size_t size)
2800	{
2801	if (size < PAGE_SIZE)
2802	kmem_cache_free(s: get_slab(size), objp: ptr);
2803	else
2804	free_pages(addr: (unsigned long)ptr, order: get_order(size));
2805	};
2806
2807	/*
2808	* Journal_head storage management
2809	*/
2810	static struct kmem_cache *jbd2_journal_head_cache;
2811	#ifdef CONFIG_JBD2_DEBUG
2812	static atomic_t nr_journal_heads = ATOMIC_INIT(`0`);
2813	#endif
2814
2815	static int __init jbd2_journal_init_journal_head_cache(void)
2816	{
2817	J_ASSERT(!jbd2_journal_head_cache);
2818	jbd2_journal_head_cache = kmem_cache_create(name: "jbd2_journal_head",
2819	size: sizeof(struct journal_head),
2820	align: `0`, / offset /
2821	SLAB_TEMPORARY \| SLAB_TYPESAFE_BY_RCU,
2822	NULL); / ctor /
2823	if (!jbd2_journal_head_cache) {
2824	printk(KERN_EMERG "JBD2: no memory for journal_head cache\n");
2825	return -ENOMEM;
2826	}
2827	return `0`;
2828	}
2829
2830	static void jbd2_journal_destroy_journal_head_cache(void)
2831	{
2832	kmem_cache_destroy(s: jbd2_journal_head_cache);
2833	jbd2_journal_head_cache = NULL;
2834	}
2835
2836	/*
2837	* journal_head splicing and dicing
2838	*/
2839	static struct journal_head journal_alloc_journal_head(void*)
2840	{
2841	struct journal_head *ret;
2842
2843	#ifdef CONFIG_JBD2_DEBUG
2844	atomic_inc(v: &nr_journal_heads);
2845	#endif
2846	ret = kmem_cache_zalloc(k: jbd2_journal_head_cache, GFP_NOFS);
2847	if (!ret) {
2848	jbd2_debug(`1`, "out of memory for journal_head\n");
2849	pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
2850	ret = kmem_cache_zalloc(k: jbd2_journal_head_cache,
2851	GFP_NOFS \| __GFP_NOFAIL);
2852	}
2853	if (ret)
2854	spin_lock_init(&ret->b_state_lock);
2855	return ret;
2856	}
2857
2858	static void journal_free_journal_head(struct journal_head *jh)
2859	{
2860	#ifdef CONFIG_JBD2_DEBUG
2861	atomic_dec(v: &nr_journal_heads);
2862	memset(jh, JBD2_POISON_FREE, sizeof(*jh));
2863	#endif
2864	kmem_cache_free(s: jbd2_journal_head_cache, objp: jh);
2865	}
2866
2867	/*
2868	* A journal_head is attached to a buffer_head whenever JBD has an
2869	* interest in the buffer.
2870	*
2871	* Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
2872	* is set. This bit is tested in core kernel code where we need to take
2873	* JBD-specific actions. Testing the zeroness of ->b_private is not reliable
2874	* there.
2875	*
2876	* When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
2877	*
2878	* When a buffer has its BH_JBD bit set it is immune from being released by
2879	* core kernel code, mainly via ->b_count.
2880	*
2881	* A journal_head is detached from its buffer_head when the journal_head's
2882	* b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
2883	* transaction (b_cp_transaction) hold their references to b_jcount.
2884	*
2885	* Various places in the kernel want to attach a journal_head to a buffer_head
2886	* _before_ attaching the journal_head to a transaction. To protect the
2887	* journal_head in this situation, jbd2_journal_add_journal_head elevates the
2888	* journal_head's b_jcount refcount by one. The caller must call
2889	* jbd2_journal_put_journal_head() to undo this.
2890	*
2891	* So the typical usage would be:
2892	*
2893	* (Attach a journal_head if needed. Increments b_jcount)
2894	* struct journal_head *jh = jbd2_journal_add_journal_head(bh);
2895	* ...
2896	* (Get another reference for transaction)
2897	* jbd2_journal_grab_journal_head(bh);
2898	* jh->b_transaction = xxx;
2899	* (Put original reference)
2900	* jbd2_journal_put_journal_head(jh);
2901	*/
2902
2903	/*
2904	* Give a buffer_head a journal_head.
2905	*
2906	* May sleep.
2907	*/
2908	struct journal_head jbd2_journal_add_journal_head(struct* buffer_head *bh)
2909	{
2910	struct journal_head *jh;
2911	struct journal_head *new_jh = NULL;
2912
2913	repeat:
2914	if (!buffer_jbd(bh))
2915	new_jh = journal_alloc_journal_head();
2916
2917	jbd_lock_bh_journal_head(bh);
2918	if (buffer_jbd(bh)) {
2919	jh = bh2jh(bh);
2920	} else {
2921	J_ASSERT_BH(bh,
2922	(atomic_read(&bh->b_count) > `0`) \|\|
2923	(bh->b_folio && bh->b_folio->mapping));
2924
2925	if (!new_jh) {
2926	jbd_unlock_bh_journal_head(bh);
2927	goto repeat;
2928	}
2929
2930	jh = new_jh;
2931	new_jh = NULL; / We consumed it /
2932	set_buffer_jbd(bh);
2933	bh->b_private = jh;
2934	jh->b_bh = bh;
2935	get_bh(bh);
2936	BUFFER_TRACE(bh, "added journal_head");
2937	}
2938	jh->b_jcount++;
2939	jbd_unlock_bh_journal_head(bh);
2940	if (new_jh)
2941	journal_free_journal_head(jh: new_jh);
2942	return bh->b_private;
2943	}
2944
2945	/*
2946	* Grab a ref against this buffer_head's journal_head. If it ended up not
2947	* having a journal_head, return NULL
2948	*/
2949	struct journal_head jbd2_journal_grab_journal_head(struct* buffer_head *bh)
2950	{
2951	struct journal_head *jh = NULL;
2952
2953	jbd_lock_bh_journal_head(bh);
2954	if (buffer_jbd(bh)) {
2955	jh = bh2jh(bh);
2956	jh->b_jcount++;
2957	}
2958	jbd_unlock_bh_journal_head(bh);
2959	return jh;
2960	}
2961	EXPORT_SYMBOL(jbd2_journal_grab_journal_head);
2962
2963	static void __journal_remove_journal_head(struct buffer_head *bh)
2964	{
2965	struct journal_head *jh = bh2jh(bh);
2966
2967	J_ASSERT_JH(jh, jh->b_transaction == NULL);
2968	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
2969	J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
2970	J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
2971	J_ASSERT_BH(bh, buffer_jbd(bh));
2972	J_ASSERT_BH(bh, jh2bh(jh) == bh);
2973	BUFFER_TRACE(bh, "remove journal_head");
2974
2975	/ Unlink before dropping the lock /
2976	bh->b_private = NULL;
2977	jh->b_bh = NULL; / debug, really /
2978	clear_buffer_jbd(bh);
2979	}
2980
2981	static void journal_release_journal_head(struct journal_head *jh, size_t b_size)
2982	{
2983	if (jh->b_frozen_data) {
2984	printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
2985	jbd2_free(ptr: jh->b_frozen_data, size: b_size);
2986	}
2987	if (jh->b_committed_data) {
2988	printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
2989	jbd2_free(ptr: jh->b_committed_data, size: b_size);
2990	}
2991	journal_free_journal_head(jh);
2992	}
2993
2994	/*
2995	* Drop a reference on the passed journal_head. If it fell to zero then
2996	* release the journal_head from the buffer_head.
2997	*/
2998	void jbd2_journal_put_journal_head(struct journal_head *jh)
2999	{
3000	struct buffer_head *bh = jh2bh(jh);
3001
3002	jbd_lock_bh_journal_head(bh);
3003	J_ASSERT_JH(jh, jh->b_jcount > `0`);
3004	--jh->b_jcount;
3005	if (!jh->b_jcount) {
3006	__journal_remove_journal_head(bh);
3007	jbd_unlock_bh_journal_head(bh);
3008	journal_release_journal_head(jh, b_size: bh->b_size);
3009	__brelse(bh);
3010	} else {
3011	jbd_unlock_bh_journal_head(bh);
3012	}
3013	}
3014	EXPORT_SYMBOL(jbd2_journal_put_journal_head);
3015
3016	/*
3017	* Initialize jbd inode head
3018	*/
3019	void jbd2_journal_init_jbd_inode(struct jbd2_inode jinode, struct* inode *inode)
3020	{
3021	jinode->i_transaction = NULL;
3022	jinode->i_next_transaction = NULL;
3023	jinode->i_vfs_inode = inode;
3024	jinode->i_flags = `0`;
3025	jinode->i_dirty_start = `0`;
3026	jinode->i_dirty_end = `0`;
3027	INIT_LIST_HEAD(list: &jinode->i_list);
3028	}
3029
3030	/*
3031	* Function to be called before we start removing inode from memory (i.e.,
3032	* clear_inode() is a fine place to be called from). It removes inode from
3033	* transaction's lists.
3034	*/
3035	void jbd2_journal_release_jbd_inode(journal_t *journal,
3036	struct jbd2_inode *jinode)
3037	{
3038	if (!journal)
3039	return;
3040	restart:
3041	spin_lock(lock: &journal->j_list_lock);
3042	/ Is commit writing out inode - we have to wait /
3043	if (jinode->i_flags & JI_COMMIT_RUNNING) {
3044	wait_queue_head_t *wq;
3045	DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
3046	wq = bit_waitqueue(word: &jinode->i_flags, __JI_COMMIT_RUNNING);
3047	prepare_to_wait(wq_head: wq, wq_entry: &wait.wq_entry, TASK_UNINTERRUPTIBLE);
3048	spin_unlock(lock: &journal->j_list_lock);
3049	schedule();
3050	finish_wait(wq_head: wq, wq_entry: &wait.wq_entry);
3051	goto restart;
3052	}
3053
3054	if (jinode->i_transaction) {
3055	list_del(entry: &jinode->i_list);
3056	jinode->i_transaction = NULL;
3057	}
3058	spin_unlock(lock: &journal->j_list_lock);
3059	}
3060
3061
3062	#ifdef CONFIG_PROC_FS
3063
3064	#define JBD2_STATS_PROC_NAME "fs/jbd2"
3065
3066	static void __init jbd2_create_jbd_stats_proc_entry(void)
3067	{
3068	proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL);
3069	}
3070
3071	static void __exit jbd2_remove_jbd_stats_proc_entry(void)
3072	{
3073	if (proc_jbd2_stats)
3074	remove_proc_entry(JBD2_STATS_PROC_NAME, NULL);
3075	}
3076
3077	#else
3078
3079	#define jbd2_create_jbd_stats_proc_entry() do {} while (0)
3080	#define jbd2_remove_jbd_stats_proc_entry() do {} while (0)
3081
3082	#endif
3083
3084	struct kmem_cache jbd2_handle_cache, jbd2_inode_cache;
3085
3086	static int __init jbd2_journal_init_inode_cache(void)
3087	{
3088	J_ASSERT(!jbd2_inode_cache);
3089	jbd2_inode_cache = KMEM_CACHE(jbd2_inode, `0`);
3090	if (!jbd2_inode_cache) {
3091	pr_emerg("JBD2: failed to create inode cache\n");
3092	return -ENOMEM;
3093	}
3094	return `0`;
3095	}
3096
3097	static int __init jbd2_journal_init_handle_cache(void)
3098	{
3099	J_ASSERT(!jbd2_handle_cache);
3100	jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
3101	if (!jbd2_handle_cache) {
3102	printk(KERN_EMERG "JBD2: failed to create handle cache\n");
3103	return -ENOMEM;
3104	}
3105	return `0`;
3106	}
3107
3108	static void jbd2_journal_destroy_inode_cache(void)
3109	{
3110	kmem_cache_destroy(s: jbd2_inode_cache);
3111	jbd2_inode_cache = NULL;
3112	}
3113
3114	static void jbd2_journal_destroy_handle_cache(void)
3115	{
3116	kmem_cache_destroy(s: jbd2_handle_cache);
3117	jbd2_handle_cache = NULL;
3118	}
3119
3120	/*
3121	* Module startup and shutdown
3122	*/
3123
3124	static int __init journal_init_caches(void)
3125	{
3126	int ret;
3127
3128	ret = jbd2_journal_init_revoke_record_cache();
3129	if (ret == `0`)
3130	ret = jbd2_journal_init_revoke_table_cache();
3131	if (ret == `0`)
3132	ret = jbd2_journal_init_journal_head_cache();
3133	if (ret == `0`)
3134	ret = jbd2_journal_init_handle_cache();
3135	if (ret == `0`)
3136	ret = jbd2_journal_init_inode_cache();
3137	if (ret == `0`)
3138	ret = jbd2_journal_init_transaction_cache();
3139	return ret;
3140	}
3141
3142	static void jbd2_journal_destroy_caches(void)
3143	{
3144	jbd2_journal_destroy_revoke_record_cache();
3145	jbd2_journal_destroy_revoke_table_cache();
3146	jbd2_journal_destroy_journal_head_cache();
3147	jbd2_journal_destroy_handle_cache();
3148	jbd2_journal_destroy_inode_cache();
3149	jbd2_journal_destroy_transaction_cache();
3150	jbd2_journal_destroy_slabs();
3151	}
3152
3153	static int __init journal_init(void)
3154	{
3155	int ret;
3156
3157	BUILD_BUG_ON(sizeof(struct journal_superblock_s) != `1024`);
3158
3159	ret = journal_init_caches();
3160	if (ret == `0`) {
3161	jbd2_create_jbd_stats_proc_entry();
3162	} else {
3163	jbd2_journal_destroy_caches();
3164	}
3165	return ret;
3166	}
3167
3168	static void __exit journal_exit(void)
3169	{
3170	#ifdef CONFIG_JBD2_DEBUG
3171	int n = atomic_read(v: &nr_journal_heads);
3172	if (n)
3173	printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n);
3174	#endif
3175	jbd2_remove_jbd_stats_proc_entry();
3176	jbd2_journal_destroy_caches();
3177	}
3178
3179	MODULE_LICENSE("GPL");
3180	module_init(journal_init);
3181	module_exit(journal_exit);
3182
3183

source code of linux/fs/jbd2/journal.c