jfs_logmgr.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. /*
  2. * Copyright (C) International Business Machines Corp., 2000-2004
  3. * Portions Copyright (C) Christoph Hellwig, 2001-2002
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
  13. * the GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. #ifndef _H_JFS_LOGMGR
  20. #define _H_JFS_LOGMGR
  21. #include "jfs_filsys.h"
  22. #include "jfs_lock.h"
  23. /*
  24. * log manager configuration parameters
  25. */
  26. /* log page size */
  27. #define LOGPSIZE 4096
  28. #define L2LOGPSIZE 12
  29. #define LOGPAGES 16 /* Log pages per mounted file system */
  30. /*
  31. * log logical volume
  32. *
  33. * a log is used to make the commit operation on journalled
  34. * files within the same logical volume group atomic.
  35. * a log is implemented with a logical volume.
  36. * there is one log per logical volume group.
  37. *
  38. * block 0 of the log logical volume is not used (ipl etc).
  39. * block 1 contains a log "superblock" and is used by logFormat(),
  40. * lmLogInit(), lmLogShutdown(), and logRedo() to record status
  41. * of the log but is not otherwise used during normal processing.
  42. * blocks 2 - (N-1) are used to contain log records.
  43. *
  44. * when a volume group is varied-on-line, logRedo() must have
  45. * been executed before the file systems (logical volumes) in
  46. * the volume group can be mounted.
  47. */
  48. /*
  49. * log superblock (block 1 of logical volume)
  50. */
  51. #define LOGSUPER_B 1
  52. #define LOGSTART_B 2
  53. #define LOGMAGIC 0x87654321
  54. #define LOGVERSION 1
  55. #define MAX_ACTIVE 128 /* Max active file systems sharing log */
  56. struct logsuper {
  57. __le32 magic; /* 4: log lv identifier */
  58. __le32 version; /* 4: version number */
  59. __le32 serial; /* 4: log open/mount counter */
  60. __le32 size; /* 4: size in number of LOGPSIZE blocks */
  61. __le32 bsize; /* 4: logical block size in byte */
  62. __le32 l2bsize; /* 4: log2 of bsize */
  63. __le32 flag; /* 4: option */
  64. __le32 state; /* 4: state - see below */
  65. __le32 end; /* 4: addr of last log record set by logredo */
  66. char uuid[16]; /* 16: 128-bit journal uuid */
  67. char label[16]; /* 16: journal label */
  68. struct {
  69. char uuid[16];
  70. } active[MAX_ACTIVE]; /* 2048: active file systems list */
  71. };
  72. #define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
  73. /* log flag: commit option (see jfs_filsys.h) */
  74. /* log state */
  75. #define LOGMOUNT 0 /* log mounted by lmLogInit() */
  76. #define LOGREDONE 1 /* log shutdown by lmLogShutdown().
  77. * log redo completed by logredo().
  78. */
  79. #define LOGWRAP 2 /* log wrapped */
  80. #define LOGREADERR 3 /* log read error detected in logredo() */
  81. /*
  82. * log logical page
  83. *
  84. * (this comment should be rewritten !)
  85. * the header and trailer structures (h,t) will normally have
  86. * the same page and eor value.
  87. * An exception to this occurs when a complete page write is not
  88. * accomplished on a power failure. Since the hardware may "split write"
  89. * sectors in the page, any out of order sequence may occur during powerfail
  90. * and needs to be recognized during log replay. The xor value is
  91. * an "exclusive or" of all log words in the page up to eor. This
  92. * 32 bit eor is stored with the top 16 bits in the header and the
  93. * bottom 16 bits in the trailer. logredo can easily recognize pages
  94. * that were not completed by reconstructing this eor and checking
  95. * the log page.
  96. *
  97. * Previous versions of the operating system did not allow split
  98. * writes and detected partially written records in logredo by
  99. * ordering the updates to the header, trailer, and the move of data
  100. * into the logdata area. The order: (1) data is moved (2) header
  101. * is updated (3) trailer is updated. In logredo, when the header
  102. * differed from the trailer, the header and trailer were reconciled
  103. * as follows: if h.page != t.page they were set to the smaller of
  104. * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
  105. * h.eor != t.eor they were set to the smaller of their two values.
  106. */
  107. struct logpage {
  108. struct { /* header */
  109. __le32 page; /* 4: log sequence page number */
  110. __le16 rsrvd; /* 2: */
  111. __le16 eor; /* 2: end-of-log offset of lasrt record write */
  112. } h;
  113. __le32 data[LOGPSIZE / 4 - 4]; /* log record area */
  114. struct { /* trailer */
  115. __le32 page; /* 4: normally the same as h.page */
  116. __le16 rsrvd; /* 2: */
  117. __le16 eor; /* 2: normally the same as h.eor */
  118. } t;
  119. };
  120. #define LOGPHDRSIZE 8 /* log page header size */
  121. #define LOGPTLRSIZE 8 /* log page trailer size */
  122. /*
  123. * log record
  124. *
  125. * (this comment should be rewritten !)
  126. * jfs uses only "after" log records (only a single writer is allowed
  127. * in a page, pages are written to temporary paging space if
  128. * if they must be written to disk before commit, and i/o is
  129. * scheduled for modified pages to their home location after
  130. * the log records containing the after values and the commit
  131. * record is written to the log on disk, undo discards the copy
  132. * in main-memory.)
  133. *
  134. * a log record consists of a data area of variable length followed by
  135. * a descriptor of fixed size LOGRDSIZE bytes.
  136. * the data area is rounded up to an integral number of 4-bytes and
  137. * must be no longer than LOGPSIZE.
  138. * the descriptor is of size of multiple of 4-bytes and aligned on a
  139. * 4-byte boundary.
  140. * records are packed one after the other in the data area of log pages.
  141. * (sometimes a DUMMY record is inserted so that at least one record ends
  142. * on every page or the longest record is placed on at most two pages).
  143. * the field eor in page header/trailer points to the byte following
  144. * the last record on a page.
  145. */
  146. /* log record types */
  147. #define LOG_COMMIT 0x8000
  148. #define LOG_SYNCPT 0x4000
  149. #define LOG_MOUNT 0x2000
  150. #define LOG_REDOPAGE 0x0800
  151. #define LOG_NOREDOPAGE 0x0080
  152. #define LOG_NOREDOINOEXT 0x0040
  153. #define LOG_UPDATEMAP 0x0008
  154. #define LOG_NOREDOFILE 0x0001
  155. /* REDOPAGE/NOREDOPAGE log record data type */
  156. #define LOG_INODE 0x0001
  157. #define LOG_XTREE 0x0002
  158. #define LOG_DTREE 0x0004
  159. #define LOG_BTROOT 0x0010
  160. #define LOG_EA 0x0020
  161. #define LOG_ACL 0x0040
  162. #define LOG_DATA 0x0080
  163. #define LOG_NEW 0x0100
  164. #define LOG_EXTEND 0x0200
  165. #define LOG_RELOCATE 0x0400
  166. #define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */
  167. /* UPDATEMAP log record descriptor type */
  168. #define LOG_ALLOCXADLIST 0x0080
  169. #define LOG_ALLOCPXDLIST 0x0040
  170. #define LOG_ALLOCXAD 0x0020
  171. #define LOG_ALLOCPXD 0x0010
  172. #define LOG_FREEXADLIST 0x0008
  173. #define LOG_FREEPXDLIST 0x0004
  174. #define LOG_FREEXAD 0x0002
  175. #define LOG_FREEPXD 0x0001
  176. struct lrd {
  177. /*
  178. * type independent area
  179. */
  180. __le32 logtid; /* 4: log transaction identifier */
  181. __le32 backchain; /* 4: ptr to prev record of same transaction */
  182. __le16 type; /* 2: record type */
  183. __le16 length; /* 2: length of data in record (in byte) */
  184. __le32 aggregate; /* 4: file system lv/aggregate */
  185. /* (16) */
  186. /*
  187. * type dependent area (20)
  188. */
  189. union {
  190. /*
  191. * COMMIT: commit
  192. *
  193. * transaction commit: no type-dependent information;
  194. */
  195. /*
  196. * REDOPAGE: after-image
  197. *
  198. * apply after-image;
  199. *
  200. * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
  201. */
  202. struct {
  203. __le32 fileset; /* 4: fileset number */
  204. __le32 inode; /* 4: inode number */
  205. __le16 type; /* 2: REDOPAGE record type */
  206. __le16 l2linesize; /* 2: log2 of line size */
  207. pxd_t pxd; /* 8: on-disk page pxd */
  208. } redopage; /* (20) */
  209. /*
  210. * NOREDOPAGE: the page is freed
  211. *
  212. * do not apply after-image records which precede this record
  213. * in the log with the same page block number to this page.
  214. *
  215. * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
  216. */
  217. struct {
  218. __le32 fileset; /* 4: fileset number */
  219. __le32 inode; /* 4: inode number */
  220. __le16 type; /* 2: NOREDOPAGE record type */
  221. __le16 rsrvd; /* 2: reserved */
  222. pxd_t pxd; /* 8: on-disk page pxd */
  223. } noredopage; /* (20) */
  224. /*
  225. * UPDATEMAP: update block allocation map
  226. *
  227. * either in-line PXD,
  228. * or out-of-line XADLIST;
  229. *
  230. * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
  231. */
  232. struct {
  233. __le32 fileset; /* 4: fileset number */
  234. __le32 inode; /* 4: inode number */
  235. __le16 type; /* 2: UPDATEMAP record type */
  236. __le16 nxd; /* 2: number of extents */
  237. pxd_t pxd; /* 8: pxd */
  238. } updatemap; /* (20) */
  239. /*
  240. * NOREDOINOEXT: the inode extent is freed
  241. *
  242. * do not apply after-image records which precede this
  243. * record in the log with the any of the 4 page block
  244. * numbers in this inode extent.
  245. *
  246. * NOTE: The fileset and pxd fields MUST remain in
  247. * the same fields in the REDOPAGE record format.
  248. *
  249. */
  250. struct {
  251. __le32 fileset; /* 4: fileset number */
  252. __le32 iagnum; /* 4: IAG number */
  253. __le32 inoext_idx; /* 4: inode extent index */
  254. pxd_t pxd; /* 8: on-disk page pxd */
  255. } noredoinoext; /* (20) */
  256. /*
  257. * SYNCPT: log sync point
  258. *
  259. * replay log upto syncpt address specified;
  260. */
  261. struct {
  262. __le32 sync; /* 4: syncpt address (0 = here) */
  263. } syncpt;
  264. /*
  265. * MOUNT: file system mount
  266. *
  267. * file system mount: no type-dependent information;
  268. */
  269. /*
  270. * ? FREEXTENT: free specified extent(s)
  271. *
  272. * free specified extent(s) from block allocation map
  273. * N.B.: nextents should be length of data/sizeof(xad_t)
  274. */
  275. struct {
  276. __le32 type; /* 4: FREEXTENT record type */
  277. __le32 nextent; /* 4: number of extents */
  278. /* data: PXD or XAD list */
  279. } freextent;
  280. /*
  281. * ? NOREDOFILE: this file is freed
  282. *
  283. * do not apply records which precede this record in the log
  284. * with the same inode number.
  285. *
  286. * NOREDILE must be the first to be written at commit
  287. * (last to be read in logredo()) - it prevents
  288. * replay of preceding updates of all preceding generations
  289. * of the inumber esp. the on-disk inode itself,
  290. * but does NOT prevent
  291. * replay of the
  292. */
  293. struct {
  294. __le32 fileset; /* 4: fileset number */
  295. __le32 inode; /* 4: inode number */
  296. } noredofile;
  297. /*
  298. * ? NEWPAGE:
  299. *
  300. * metadata type dependent
  301. */
  302. struct {
  303. __le32 fileset; /* 4: fileset number */
  304. __le32 inode; /* 4: inode number */
  305. __le32 type; /* 4: NEWPAGE record type */
  306. pxd_t pxd; /* 8: on-disk page pxd */
  307. } newpage;
  308. /*
  309. * ? DUMMY: filler
  310. *
  311. * no type-dependent information
  312. */
  313. } log;
  314. }; /* (36) */
  315. #define LOGRDSIZE (sizeof(struct lrd))
  316. /*
  317. * line vector descriptor
  318. */
  319. struct lvd {
  320. __le16 offset;
  321. __le16 length;
  322. };
  323. /*
  324. * log logical volume
  325. */
  326. struct jfs_log {
  327. struct list_head sb_list;/* This is used to sync metadata
  328. * before writing syncpt.
  329. */
  330. struct list_head journal_list; /* Global list */
  331. struct block_device *bdev; /* 4: log lv pointer */
  332. int serial; /* 4: log mount serial number */
  333. s64 base; /* @8: log extent address (inline log ) */
  334. int size; /* 4: log size in log page (in page) */
  335. int l2bsize; /* 4: log2 of bsize */
  336. long flag; /* 4: flag */
  337. struct lbuf *lbuf_free; /* 4: free lbufs */
  338. wait_queue_head_t free_wait; /* 4: */
  339. /* log write */
  340. int logtid; /* 4: log tid */
  341. int page; /* 4: page number of eol page */
  342. int eor; /* 4: eor of last record in eol page */
  343. struct lbuf *bp; /* 4: current log page buffer */
  344. struct mutex loglock; /* 4: log write serialization lock */
  345. /* syncpt */
  346. int nextsync; /* 4: bytes to write before next syncpt */
  347. int active; /* 4: */
  348. wait_queue_head_t syncwait; /* 4: */
  349. /* commit */
  350. uint cflag; /* 4: */
  351. struct list_head cqueue; /* FIFO commit queue */
  352. struct tblock *flush_tblk; /* tblk we're waiting on for flush */
  353. int gcrtc; /* 4: GC_READY transaction count */
  354. struct tblock *gclrt; /* 4: latest GC_READY transaction */
  355. spinlock_t gclock; /* 4: group commit lock */
  356. int logsize; /* 4: log data area size in byte */
  357. int lsn; /* 4: end-of-log */
  358. int clsn; /* 4: clsn */
  359. int syncpt; /* 4: addr of last syncpt record */
  360. int sync; /* 4: addr from last logsync() */
  361. struct list_head synclist; /* 8: logsynclist anchor */
  362. spinlock_t synclock; /* 4: synclist lock */
  363. struct lbuf *wqueue; /* 4: log pageout queue */
  364. int count; /* 4: count */
  365. char uuid[16]; /* 16: 128-bit uuid of log device */
  366. int no_integrity; /* 3: flag to disable journaling to disk */
  367. };
  368. /*
  369. * Log flag
  370. */
  371. #define log_INLINELOG 1
  372. #define log_SYNCBARRIER 2
  373. #define log_QUIESCE 3
  374. #define log_FLUSH 4
  375. /*
  376. * group commit flag
  377. */
  378. /* jfs_log */
  379. #define logGC_PAGEOUT 0x00000001
  380. /* tblock/lbuf */
  381. #define tblkGC_QUEUE 0x0001
  382. #define tblkGC_READY 0x0002
  383. #define tblkGC_COMMIT 0x0004
  384. #define tblkGC_COMMITTED 0x0008
  385. #define tblkGC_EOP 0x0010
  386. #define tblkGC_FREE 0x0020
  387. #define tblkGC_LEADER 0x0040
  388. #define tblkGC_ERROR 0x0080
  389. #define tblkGC_LAZY 0x0100 // D230860
  390. #define tblkGC_UNLOCKED 0x0200 // D230860
  391. /*
  392. * log cache buffer header
  393. */
  394. struct lbuf {
  395. struct jfs_log *l_log; /* 4: log associated with buffer */
  396. /*
  397. * data buffer base area
  398. */
  399. uint l_flag; /* 4: pageout control flags */
  400. struct lbuf *l_wqnext; /* 4: write queue link */
  401. struct lbuf *l_freelist; /* 4: freelistlink */
  402. int l_pn; /* 4: log page number */
  403. int l_eor; /* 4: log record eor */
  404. int l_ceor; /* 4: committed log record eor */
  405. s64 l_blkno; /* 8: log page block number */
  406. caddr_t l_ldata; /* 4: data page */
  407. struct page *l_page; /* The page itself */
  408. uint l_offset; /* Offset of l_ldata within the page */
  409. wait_queue_head_t l_ioevent; /* 4: i/o done event */
  410. };
  411. /* Reuse l_freelist for redrive list */
  412. #define l_redrive_next l_freelist
  413. /*
  414. * logsynclist block
  415. *
  416. * common logsyncblk prefix for jbuf_t and tblock
  417. */
  418. struct logsyncblk {
  419. u16 xflag; /* flags */
  420. u16 flag; /* only meaninful in tblock */
  421. lid_t lid; /* lock id */
  422. s32 lsn; /* log sequence number */
  423. struct list_head synclist; /* log sync list link */
  424. };
  425. /*
  426. * logsynclist serialization (per log)
  427. */
  428. #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
  429. #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
  430. #define LOGSYNC_UNLOCK(log, flags) \
  431. spin_unlock_irqrestore(&(log)->synclock, flags)
  432. /* compute the difference in bytes of lsn from sync point */
  433. #define logdiff(diff, lsn, log)\
  434. {\
  435. diff = (lsn) - (log)->syncpt;\
  436. if (diff < 0)\
  437. diff += (log)->logsize;\
  438. }
  439. extern int lmLogOpen(struct super_block *sb);
  440. extern int lmLogClose(struct super_block *sb);
  441. extern int lmLogShutdown(struct jfs_log * log);
  442. extern int lmLogInit(struct jfs_log * log);
  443. extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
  444. extern int lmGroupCommit(struct jfs_log *, struct tblock *);
  445. extern int jfsIOWait(void *);
  446. extern void jfs_flush_journal(struct jfs_log * log, int wait);
  447. extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
  448. #endif /* _H_JFS_LOGMGR */