1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /* |
3 | * Copyright (C) International Business Machines Corp., 2000-2004 |
4 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 |
5 | */ |
6 | #ifndef _H_JFS_LOGMGR |
7 | #define _H_JFS_LOGMGR |
8 | |
9 | #include <linux/uuid.h> |
10 | |
11 | #include "jfs_filsys.h" |
12 | #include "jfs_lock.h" |
13 | |
14 | /* |
15 | * log manager configuration parameters |
16 | */ |
17 | |
18 | /* log page size */ |
19 | #define LOGPSIZE 4096 |
20 | #define L2LOGPSIZE 12 |
21 | |
22 | #define LOGPAGES 16 /* Log pages per mounted file system */ |
23 | |
24 | /* |
25 | * log logical volume |
26 | * |
27 | * a log is used to make the commit operation on journalled |
28 | * files within the same logical volume group atomic. |
29 | * a log is implemented with a logical volume. |
30 | * there is one log per logical volume group. |
31 | * |
32 | * block 0 of the log logical volume is not used (ipl etc). |
33 | * block 1 contains a log "superblock" and is used by logFormat(), |
34 | * lmLogInit(), lmLogShutdown(), and logRedo() to record status |
35 | * of the log but is not otherwise used during normal processing. |
36 | * blocks 2 - (N-1) are used to contain log records. |
37 | * |
38 | * when a volume group is varied-on-line, logRedo() must have |
39 | * been executed before the file systems (logical volumes) in |
40 | * the volume group can be mounted. |
41 | */ |
42 | /* |
43 | * log superblock (block 1 of logical volume) |
44 | */ |
45 | #define LOGSUPER_B 1 |
46 | #define LOGSTART_B 2 |
47 | |
48 | #define LOGMAGIC 0x87654321 |
49 | #define LOGVERSION 1 |
50 | |
51 | #define MAX_ACTIVE 128 /* Max active file systems sharing log */ |
52 | |
53 | struct logsuper { |
54 | __le32 magic; /* 4: log lv identifier */ |
55 | __le32 version; /* 4: version number */ |
56 | __le32 serial; /* 4: log open/mount counter */ |
57 | __le32 size; /* 4: size in number of LOGPSIZE blocks */ |
58 | __le32 bsize; /* 4: logical block size in byte */ |
59 | __le32 l2bsize; /* 4: log2 of bsize */ |
60 | |
61 | __le32 flag; /* 4: option */ |
62 | __le32 state; /* 4: state - see below */ |
63 | |
64 | __le32 end; /* 4: addr of last log record set by logredo */ |
65 | uuid_t uuid; /* 16: 128-bit journal uuid */ |
66 | char label[16]; /* 16: journal label */ |
67 | struct { |
68 | uuid_t uuid; |
69 | } active[MAX_ACTIVE]; /* 2048: active file systems list */ |
70 | }; |
71 | |
72 | /* log flag: commit option (see jfs_filsys.h) */ |
73 | |
74 | /* log state */ |
75 | #define LOGMOUNT 0 /* log mounted by lmLogInit() */ |
76 | #define LOGREDONE 1 /* log shutdown by lmLogShutdown(). |
77 | * log redo completed by logredo(). |
78 | */ |
79 | #define LOGWRAP 2 /* log wrapped */ |
80 | #define LOGREADERR 3 /* log read error detected in logredo() */ |
81 | |
82 | |
83 | /* |
84 | * log logical page |
85 | * |
86 | * (this comment should be rewritten !) |
87 | * the header and trailer structures (h,t) will normally have |
88 | * the same page and eor value. |
89 | * An exception to this occurs when a complete page write is not |
90 | * accomplished on a power failure. Since the hardware may "split write" |
91 | * sectors in the page, any out of order sequence may occur during powerfail |
92 | * and needs to be recognized during log replay. The xor value is |
93 | * an "exclusive or" of all log words in the page up to eor. This |
94 | * 32 bit eor is stored with the top 16 bits in the header and the |
95 | * bottom 16 bits in the trailer. logredo can easily recognize pages |
96 | * that were not completed by reconstructing this eor and checking |
97 | * the log page. |
98 | * |
99 | * Previous versions of the operating system did not allow split |
100 | * writes and detected partially written records in logredo by |
101 | * ordering the updates to the header, trailer, and the move of data |
102 | * into the logdata area. The order: (1) data is moved (2) header |
103 | * is updated (3) trailer is updated. In logredo, when the header |
104 | * differed from the trailer, the header and trailer were reconciled |
105 | * as follows: if h.page != t.page they were set to the smaller of |
106 | * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) |
107 | * h.eor != t.eor they were set to the smaller of their two values. |
108 | */ |
109 | struct logpage { |
110 | struct { /* header */ |
111 | __le32 page; /* 4: log sequence page number */ |
112 | __le16 rsrvd; /* 2: */ |
113 | __le16 eor; /* 2: end-of-log offset of lasrt record write */ |
114 | } h; |
115 | |
116 | __le32 data[LOGPSIZE / 4 - 4]; /* log record area */ |
117 | |
118 | struct { /* trailer */ |
119 | __le32 page; /* 4: normally the same as h.page */ |
120 | __le16 rsrvd; /* 2: */ |
121 | __le16 eor; /* 2: normally the same as h.eor */ |
122 | } t; |
123 | }; |
124 | |
125 | #define LOGPHDRSIZE 8 /* log page header size */ |
126 | #define LOGPTLRSIZE 8 /* log page trailer size */ |
127 | |
128 | |
129 | /* |
130 | * log record |
131 | * |
132 | * (this comment should be rewritten !) |
133 | * jfs uses only "after" log records (only a single writer is allowed |
134 | * in a page, pages are written to temporary paging space if |
135 | * they must be written to disk before commit, and i/o is |
136 | * scheduled for modified pages to their home location after |
137 | * the log records containing the after values and the commit |
138 | * record is written to the log on disk, undo discards the copy |
139 | * in main-memory.) |
140 | * |
141 | * a log record consists of a data area of variable length followed by |
142 | * a descriptor of fixed size LOGRDSIZE bytes. |
143 | * the data area is rounded up to an integral number of 4-bytes and |
144 | * must be no longer than LOGPSIZE. |
145 | * the descriptor is of size of multiple of 4-bytes and aligned on a |
146 | * 4-byte boundary. |
147 | * records are packed one after the other in the data area of log pages. |
148 | * (sometimes a DUMMY record is inserted so that at least one record ends |
149 | * on every page or the longest record is placed on at most two pages). |
150 | * the field eor in page header/trailer points to the byte following |
151 | * the last record on a page. |
152 | */ |
153 | |
154 | /* log record types */ |
155 | #define LOG_COMMIT 0x8000 |
156 | #define LOG_SYNCPT 0x4000 |
157 | #define LOG_MOUNT 0x2000 |
158 | #define LOG_REDOPAGE 0x0800 |
159 | #define LOG_NOREDOPAGE 0x0080 |
160 | #define LOG_NOREDOINOEXT 0x0040 |
161 | #define LOG_UPDATEMAP 0x0008 |
162 | #define LOG_NOREDOFILE 0x0001 |
163 | |
164 | /* REDOPAGE/NOREDOPAGE log record data type */ |
165 | #define LOG_INODE 0x0001 |
166 | #define LOG_XTREE 0x0002 |
167 | #define LOG_DTREE 0x0004 |
168 | #define LOG_BTROOT 0x0010 |
169 | #define LOG_EA 0x0020 |
170 | #define LOG_ACL 0x0040 |
171 | #define LOG_DATA 0x0080 |
172 | #define LOG_NEW 0x0100 |
173 | #define LOG_EXTEND 0x0200 |
174 | #define LOG_RELOCATE 0x0400 |
175 | #define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */ |
176 | |
177 | /* UPDATEMAP log record descriptor type */ |
178 | #define LOG_ALLOCXADLIST 0x0080 |
179 | #define LOG_ALLOCPXDLIST 0x0040 |
180 | #define LOG_ALLOCXAD 0x0020 |
181 | #define LOG_ALLOCPXD 0x0010 |
182 | #define LOG_FREEXADLIST 0x0008 |
183 | #define LOG_FREEPXDLIST 0x0004 |
184 | #define LOG_FREEXAD 0x0002 |
185 | #define LOG_FREEPXD 0x0001 |
186 | |
187 | |
188 | struct lrd { |
189 | /* |
190 | * type independent area |
191 | */ |
192 | __le32 logtid; /* 4: log transaction identifier */ |
193 | __le32 backchain; /* 4: ptr to prev record of same transaction */ |
194 | __le16 type; /* 2: record type */ |
195 | __le16 length; /* 2: length of data in record (in byte) */ |
196 | __le32 aggregate; /* 4: file system lv/aggregate */ |
197 | /* (16) */ |
198 | |
199 | /* |
200 | * type dependent area (20) |
201 | */ |
202 | union { |
203 | |
204 | /* |
205 | * COMMIT: commit |
206 | * |
207 | * transaction commit: no type-dependent information; |
208 | */ |
209 | |
210 | /* |
211 | * REDOPAGE: after-image |
212 | * |
213 | * apply after-image; |
214 | * |
215 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; |
216 | */ |
217 | struct { |
218 | __le32 fileset; /* 4: fileset number */ |
219 | __le32 inode; /* 4: inode number */ |
220 | __le16 type; /* 2: REDOPAGE record type */ |
221 | __le16 l2linesize; /* 2: log2 of line size */ |
222 | pxd_t pxd; /* 8: on-disk page pxd */ |
223 | } redopage; /* (20) */ |
224 | |
225 | /* |
226 | * NOREDOPAGE: the page is freed |
227 | * |
228 | * do not apply after-image records which precede this record |
229 | * in the log with the same page block number to this page. |
230 | * |
231 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; |
232 | */ |
233 | struct { |
234 | __le32 fileset; /* 4: fileset number */ |
235 | __le32 inode; /* 4: inode number */ |
236 | __le16 type; /* 2: NOREDOPAGE record type */ |
237 | __le16 rsrvd; /* 2: reserved */ |
238 | pxd_t pxd; /* 8: on-disk page pxd */ |
239 | } noredopage; /* (20) */ |
240 | |
241 | /* |
242 | * UPDATEMAP: update block allocation map |
243 | * |
244 | * either in-line PXD, |
245 | * or out-of-line XADLIST; |
246 | * |
247 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; |
248 | */ |
249 | struct { |
250 | __le32 fileset; /* 4: fileset number */ |
251 | __le32 inode; /* 4: inode number */ |
252 | __le16 type; /* 2: UPDATEMAP record type */ |
253 | __le16 nxd; /* 2: number of extents */ |
254 | pxd_t pxd; /* 8: pxd */ |
255 | } updatemap; /* (20) */ |
256 | |
257 | /* |
258 | * NOREDOINOEXT: the inode extent is freed |
259 | * |
260 | * do not apply after-image records which precede this |
261 | * record in the log with the any of the 4 page block |
262 | * numbers in this inode extent. |
263 | * |
264 | * NOTE: The fileset and pxd fields MUST remain in |
265 | * the same fields in the REDOPAGE record format. |
266 | * |
267 | */ |
268 | struct { |
269 | __le32 fileset; /* 4: fileset number */ |
270 | __le32 iagnum; /* 4: IAG number */ |
271 | __le32 inoext_idx; /* 4: inode extent index */ |
272 | pxd_t pxd; /* 8: on-disk page pxd */ |
273 | } noredoinoext; /* (20) */ |
274 | |
275 | /* |
276 | * SYNCPT: log sync point |
277 | * |
278 | * replay log up to syncpt address specified; |
279 | */ |
280 | struct { |
281 | __le32 sync; /* 4: syncpt address (0 = here) */ |
282 | } syncpt; |
283 | |
284 | /* |
285 | * MOUNT: file system mount |
286 | * |
287 | * file system mount: no type-dependent information; |
288 | */ |
289 | |
290 | /* |
291 | * ? FREEXTENT: free specified extent(s) |
292 | * |
293 | * free specified extent(s) from block allocation map |
294 | * N.B.: nextents should be length of data/sizeof(xad_t) |
295 | */ |
296 | struct { |
297 | __le32 type; /* 4: FREEXTENT record type */ |
298 | __le32 nextent; /* 4: number of extents */ |
299 | |
300 | /* data: PXD or XAD list */ |
301 | } freextent; |
302 | |
303 | /* |
304 | * ? NOREDOFILE: this file is freed |
305 | * |
306 | * do not apply records which precede this record in the log |
307 | * with the same inode number. |
308 | * |
309 | * NOREDOFILE must be the first to be written at commit |
310 | * (last to be read in logredo()) - it prevents |
311 | * replay of preceding updates of all preceding generations |
312 | * of the inumber esp. the on-disk inode itself. |
313 | */ |
314 | struct { |
315 | __le32 fileset; /* 4: fileset number */ |
316 | __le32 inode; /* 4: inode number */ |
317 | } noredofile; |
318 | |
319 | /* |
320 | * ? NEWPAGE: |
321 | * |
322 | * metadata type dependent |
323 | */ |
324 | struct { |
325 | __le32 fileset; /* 4: fileset number */ |
326 | __le32 inode; /* 4: inode number */ |
327 | __le32 type; /* 4: NEWPAGE record type */ |
328 | pxd_t pxd; /* 8: on-disk page pxd */ |
329 | } newpage; |
330 | |
331 | /* |
332 | * ? DUMMY: filler |
333 | * |
334 | * no type-dependent information |
335 | */ |
336 | } log; |
337 | }; /* (36) */ |
338 | |
339 | #define LOGRDSIZE (sizeof(struct lrd)) |
340 | |
341 | /* |
342 | * line vector descriptor |
343 | */ |
344 | struct lvd { |
345 | __le16 offset; |
346 | __le16 length; |
347 | }; |
348 | |
349 | |
350 | /* |
351 | * log logical volume |
352 | */ |
353 | struct jfs_log { |
354 | |
355 | struct list_head sb_list;/* This is used to sync metadata |
356 | * before writing syncpt. |
357 | */ |
358 | struct list_head journal_list; /* Global list */ |
359 | struct bdev_handle *bdev_handle; /* 4: log lv pointer */ |
360 | int serial; /* 4: log mount serial number */ |
361 | |
362 | s64 base; /* @8: log extent address (inline log ) */ |
363 | int size; /* 4: log size in log page (in page) */ |
364 | int l2bsize; /* 4: log2 of bsize */ |
365 | |
366 | unsigned long flag; /* 4: flag */ |
367 | |
368 | struct lbuf *lbuf_free; /* 4: free lbufs */ |
369 | wait_queue_head_t free_wait; /* 4: */ |
370 | |
371 | /* log write */ |
372 | int logtid; /* 4: log tid */ |
373 | int page; /* 4: page number of eol page */ |
374 | int eor; /* 4: eor of last record in eol page */ |
375 | struct lbuf *bp; /* 4: current log page buffer */ |
376 | |
377 | struct mutex loglock; /* 4: log write serialization lock */ |
378 | |
379 | /* syncpt */ |
380 | int nextsync; /* 4: bytes to write before next syncpt */ |
381 | int active; /* 4: */ |
382 | wait_queue_head_t syncwait; /* 4: */ |
383 | |
384 | /* commit */ |
385 | uint cflag; /* 4: */ |
386 | struct list_head cqueue; /* FIFO commit queue */ |
387 | struct tblock *flush_tblk; /* tblk we're waiting on for flush */ |
388 | int gcrtc; /* 4: GC_READY transaction count */ |
389 | struct tblock *gclrt; /* 4: latest GC_READY transaction */ |
390 | spinlock_t gclock; /* 4: group commit lock */ |
391 | int logsize; /* 4: log data area size in byte */ |
392 | int lsn; /* 4: end-of-log */ |
393 | int clsn; /* 4: clsn */ |
394 | int syncpt; /* 4: addr of last syncpt record */ |
395 | int sync; /* 4: addr from last logsync() */ |
396 | struct list_head synclist; /* 8: logsynclist anchor */ |
397 | spinlock_t synclock; /* 4: synclist lock */ |
398 | struct lbuf *wqueue; /* 4: log pageout queue */ |
399 | int count; /* 4: count */ |
400 | uuid_t uuid; /* 16: 128-bit uuid of log device */ |
401 | |
402 | int no_integrity; /* 3: flag to disable journaling to disk */ |
403 | }; |
404 | |
405 | /* |
406 | * Log flag |
407 | */ |
408 | #define log_INLINELOG 1 |
409 | #define log_SYNCBARRIER 2 |
410 | #define log_QUIESCE 3 |
411 | #define log_FLUSH 4 |
412 | |
413 | /* |
414 | * group commit flag |
415 | */ |
416 | /* jfs_log */ |
417 | #define logGC_PAGEOUT 0x00000001 |
418 | |
419 | /* tblock/lbuf */ |
420 | #define tblkGC_QUEUE 0x0001 |
421 | #define tblkGC_READY 0x0002 |
422 | #define tblkGC_COMMIT 0x0004 |
423 | #define tblkGC_COMMITTED 0x0008 |
424 | #define tblkGC_EOP 0x0010 |
425 | #define tblkGC_FREE 0x0020 |
426 | #define tblkGC_LEADER 0x0040 |
427 | #define tblkGC_ERROR 0x0080 |
428 | #define tblkGC_LAZY 0x0100 // D230860 |
429 | #define tblkGC_UNLOCKED 0x0200 // D230860 |
430 | |
431 | /* |
432 | * log cache buffer header |
433 | */ |
434 | struct lbuf { |
435 | struct jfs_log *l_log; /* 4: log associated with buffer */ |
436 | |
437 | /* |
438 | * data buffer base area |
439 | */ |
440 | uint l_flag; /* 4: pageout control flags */ |
441 | |
442 | struct lbuf *l_wqnext; /* 4: write queue link */ |
443 | struct lbuf *l_freelist; /* 4: freelistlink */ |
444 | |
445 | int l_pn; /* 4: log page number */ |
446 | int l_eor; /* 4: log record eor */ |
447 | int l_ceor; /* 4: committed log record eor */ |
448 | |
449 | s64 l_blkno; /* 8: log page block number */ |
450 | caddr_t l_ldata; /* 4: data page */ |
451 | struct page *l_page; /* The page itself */ |
452 | uint l_offset; /* Offset of l_ldata within the page */ |
453 | |
454 | wait_queue_head_t l_ioevent; /* 4: i/o done event */ |
455 | }; |
456 | |
457 | /* Reuse l_freelist for redrive list */ |
458 | #define l_redrive_next l_freelist |
459 | |
460 | /* |
461 | * logsynclist block |
462 | * |
463 | * common logsyncblk prefix for jbuf_t and tblock |
464 | */ |
465 | struct logsyncblk { |
466 | u16 xflag; /* flags */ |
467 | u16 flag; /* only meaninful in tblock */ |
468 | lid_t lid; /* lock id */ |
469 | s32 lsn; /* log sequence number */ |
470 | struct list_head synclist; /* log sync list link */ |
471 | }; |
472 | |
473 | /* |
474 | * logsynclist serialization (per log) |
475 | */ |
476 | |
477 | #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) |
478 | #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) |
479 | #define LOGSYNC_UNLOCK(log, flags) \ |
480 | spin_unlock_irqrestore(&(log)->synclock, flags) |
481 | |
482 | /* compute the difference in bytes of lsn from sync point */ |
483 | #define logdiff(diff, lsn, log)\ |
484 | {\ |
485 | diff = (lsn) - (log)->syncpt;\ |
486 | if (diff < 0)\ |
487 | diff += (log)->logsize;\ |
488 | } |
489 | |
490 | extern int lmLogOpen(struct super_block *sb); |
491 | extern int lmLogClose(struct super_block *sb); |
492 | extern int lmLogShutdown(struct jfs_log * log); |
493 | extern int lmLogInit(struct jfs_log * log); |
494 | extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); |
495 | extern int lmGroupCommit(struct jfs_log *, struct tblock *); |
496 | extern int jfsIOWait(void *); |
497 | extern void jfs_flush_journal(struct jfs_log * log, int wait); |
498 | extern void jfs_syncpt(struct jfs_log *log, int hard_sync); |
499 | |
500 | #endif /* _H_JFS_LOGMGR */ |
501 | |