bcache.h 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. #ifndef _LINUX_BCACHE_H
  2. #define _LINUX_BCACHE_H
  3. /*
  4. * Bcache on disk data structures
  5. */
  6. #include <asm/types.h>
  7. #define BITMASK(name, type, field, offset, size) \
  8. static inline __u64 name(const type *k) \
  9. { return (k->field >> offset) & ~(~0ULL << size); } \
  10. \
  11. static inline void SET_##name(type *k, __u64 v) \
  12. { \
  13. k->field &= ~(~(~0ULL << size) << offset); \
  14. k->field |= (v & ~(~0ULL << size)) << offset; \
  15. }
  16. /* Btree keys - all units are in sectors */
  17. struct bkey {
  18. __u64 high;
  19. __u64 low;
  20. __u64 ptr[];
  21. };
  22. #define KEY_FIELD(name, field, offset, size) \
  23. BITMASK(name, struct bkey, field, offset, size)
  24. #define PTR_FIELD(name, offset, size) \
  25. static inline __u64 name(const struct bkey *k, unsigned i) \
  26. { return (k->ptr[i] >> offset) & ~(~0ULL << size); } \
  27. \
  28. static inline void SET_##name(struct bkey *k, unsigned i, __u64 v) \
  29. { \
  30. k->ptr[i] &= ~(~(~0ULL << size) << offset); \
  31. k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \
  32. }
  33. #define KEY_SIZE_BITS 16
  34. KEY_FIELD(KEY_PTRS, high, 60, 3)
  35. KEY_FIELD(HEADER_SIZE, high, 58, 2)
  36. KEY_FIELD(KEY_CSUM, high, 56, 2)
  37. KEY_FIELD(KEY_PINNED, high, 55, 1)
  38. KEY_FIELD(KEY_DIRTY, high, 36, 1)
  39. KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS)
  40. KEY_FIELD(KEY_INODE, high, 0, 20)
  41. /* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */
  42. static inline __u64 KEY_OFFSET(const struct bkey *k)
  43. {
  44. return k->low;
  45. }
  46. static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v)
  47. {
  48. k->low = v;
  49. }
  50. /*
  51. * The high bit being set is a relic from when we used it to do binary
  52. * searches - it told you where a key started. It's not used anymore,
  53. * and can probably be safely dropped.
  54. */
  55. #define KEY(inode, offset, size) \
  56. ((struct bkey) { \
  57. .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \
  58. .low = (offset) \
  59. })
  60. #define ZERO_KEY KEY(0, 0, 0)
  61. #define MAX_KEY_INODE (~(~0 << 20))
  62. #define MAX_KEY_OFFSET (~0ULL >> 1)
  63. #define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0)
  64. #define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k))
  65. #define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0)
  66. #define PTR_DEV_BITS 12
  67. PTR_FIELD(PTR_DEV, 51, PTR_DEV_BITS)
  68. PTR_FIELD(PTR_OFFSET, 8, 43)
  69. PTR_FIELD(PTR_GEN, 0, 8)
  70. #define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1)
  71. #define PTR(gen, offset, dev) \
  72. ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen)
  73. /* Bkey utility code */
  74. static inline unsigned long bkey_u64s(const struct bkey *k)
  75. {
  76. return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k);
  77. }
  78. static inline unsigned long bkey_bytes(const struct bkey *k)
  79. {
  80. return bkey_u64s(k) * sizeof(__u64);
  81. }
  82. #define bkey_copy(_dest, _src) memcpy(_dest, _src, bkey_bytes(_src))
  83. static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src)
  84. {
  85. SET_KEY_INODE(dest, KEY_INODE(src));
  86. SET_KEY_OFFSET(dest, KEY_OFFSET(src));
  87. }
  88. static inline struct bkey *bkey_next(const struct bkey *k)
  89. {
  90. __u64 *d = (void *) k;
  91. return (struct bkey *) (d + bkey_u64s(k));
  92. }
  93. static inline struct bkey *bkey_last(const struct bkey *k, unsigned nr_keys)
  94. {
  95. __u64 *d = (void *) k;
  96. return (struct bkey *) (d + nr_keys);
  97. }
  98. /* Enough for a key with 6 pointers */
  99. #define BKEY_PAD 8
  100. #define BKEY_PADDED(key) \
  101. union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; }
  102. /* Superblock */
  103. /* Version 0: Cache device
  104. * Version 1: Backing device
  105. * Version 2: Seed pointer into btree node checksum
  106. * Version 3: Cache device with new UUID format
  107. * Version 4: Backing device with data offset
  108. */
  109. #define BCACHE_SB_VERSION_CDEV 0
  110. #define BCACHE_SB_VERSION_BDEV 1
  111. #define BCACHE_SB_VERSION_CDEV_WITH_UUID 3
  112. #define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4
  113. #define BCACHE_SB_MAX_VERSION 4
  114. #define SB_SECTOR 8
  115. #define SB_SIZE 4096
  116. #define SB_LABEL_SIZE 32
  117. #define SB_JOURNAL_BUCKETS 256U
  118. /* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */
  119. #define MAX_CACHES_PER_SET 8
  120. #define BDEV_DATA_START_DEFAULT 16 /* sectors */
  121. struct cache_sb {
  122. __u64 csum;
  123. __u64 offset; /* sector where this sb was written */
  124. __u64 version;
  125. __u8 magic[16];
  126. __u8 uuid[16];
  127. union {
  128. __u8 set_uuid[16];
  129. __u64 set_magic;
  130. };
  131. __u8 label[SB_LABEL_SIZE];
  132. __u64 flags;
  133. __u64 seq;
  134. __u64 pad[8];
  135. union {
  136. struct {
  137. /* Cache devices */
  138. __u64 nbuckets; /* device size */
  139. __u16 block_size; /* sectors */
  140. __u16 bucket_size; /* sectors */
  141. __u16 nr_in_set;
  142. __u16 nr_this_dev;
  143. };
  144. struct {
  145. /* Backing devices */
  146. __u64 data_offset;
  147. /*
  148. * block_size from the cache device section is still used by
  149. * backing devices, so don't add anything here until we fix
  150. * things to not need it for backing devices anymore
  151. */
  152. };
  153. };
  154. __u32 last_mount; /* time_t */
  155. __u16 first_bucket;
  156. union {
  157. __u16 njournal_buckets;
  158. __u16 keys;
  159. };
  160. __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */
  161. };
  162. static inline _Bool SB_IS_BDEV(const struct cache_sb *sb)
  163. {
  164. return sb->version == BCACHE_SB_VERSION_BDEV
  165. || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
  166. }
  167. BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1);
  168. BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1);
  169. BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
  170. #define CACHE_REPLACEMENT_LRU 0U
  171. #define CACHE_REPLACEMENT_FIFO 1U
  172. #define CACHE_REPLACEMENT_RANDOM 2U
  173. BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
  174. #define CACHE_MODE_WRITETHROUGH 0U
  175. #define CACHE_MODE_WRITEBACK 1U
  176. #define CACHE_MODE_WRITEAROUND 2U
  177. #define CACHE_MODE_NONE 3U
  178. BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2);
  179. #define BDEV_STATE_NONE 0U
  180. #define BDEV_STATE_CLEAN 1U
  181. #define BDEV_STATE_DIRTY 2U
  182. #define BDEV_STATE_STALE 3U
  183. /*
  184. * Magic numbers
  185. *
  186. * The various other data structures have their own magic numbers, which are
  187. * xored with the first part of the cache set's UUID
  188. */
  189. #define JSET_MAGIC 0x245235c1a3625032ULL
  190. #define PSET_MAGIC 0x6750e15f87337f91ULL
  191. #define BSET_MAGIC 0x90135c78b99e07f5ULL
  192. static inline __u64 jset_magic(struct cache_sb *sb)
  193. {
  194. return sb->set_magic ^ JSET_MAGIC;
  195. }
  196. static inline __u64 pset_magic(struct cache_sb *sb)
  197. {
  198. return sb->set_magic ^ PSET_MAGIC;
  199. }
  200. static inline __u64 bset_magic(struct cache_sb *sb)
  201. {
  202. return sb->set_magic ^ BSET_MAGIC;
  203. }
  204. /*
  205. * Journal
  206. *
  207. * On disk format for a journal entry:
  208. * seq is monotonically increasing; every journal entry has its own unique
  209. * sequence number.
  210. *
  211. * last_seq is the oldest journal entry that still has keys the btree hasn't
  212. * flushed to disk yet.
  213. *
  214. * version is for on disk format changes.
  215. */
  216. #define BCACHE_JSET_VERSION_UUIDv1 1
  217. #define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */
  218. #define BCACHE_JSET_VERSION 1
  219. struct jset {
  220. __u64 csum;
  221. __u64 magic;
  222. __u64 seq;
  223. __u32 version;
  224. __u32 keys;
  225. __u64 last_seq;
  226. BKEY_PADDED(uuid_bucket);
  227. BKEY_PADDED(btree_root);
  228. __u16 btree_level;
  229. __u16 pad[3];
  230. __u64 prio_bucket[MAX_CACHES_PER_SET];
  231. union {
  232. struct bkey start[0];
  233. __u64 d[0];
  234. };
  235. };
  236. /* Bucket prios/gens */
  237. struct prio_set {
  238. __u64 csum;
  239. __u64 magic;
  240. __u64 seq;
  241. __u32 version;
  242. __u32 pad;
  243. __u64 next_bucket;
  244. struct bucket_disk {
  245. __u16 prio;
  246. __u8 gen;
  247. } __attribute((packed)) data[];
  248. };
  249. /* UUIDS - per backing device/flash only volume metadata */
  250. struct uuid_entry {
  251. union {
  252. struct {
  253. __u8 uuid[16];
  254. __u8 label[32];
  255. __u32 first_reg;
  256. __u32 last_reg;
  257. __u32 invalidated;
  258. __u32 flags;
  259. /* Size of flash only volumes */
  260. __u64 sectors;
  261. };
  262. __u8 pad[128];
  263. };
  264. };
  265. BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1);
  266. /* Btree nodes */
  267. /* Version 1: Seed pointer into btree node checksum
  268. */
  269. #define BCACHE_BSET_CSUM 1
  270. #define BCACHE_BSET_VERSION 1
  271. /*
  272. * Btree nodes
  273. *
  274. * On disk a btree node is a list/log of these; within each set the keys are
  275. * sorted
  276. */
  277. struct bset {
  278. __u64 csum;
  279. __u64 magic;
  280. __u64 seq;
  281. __u32 version;
  282. __u32 keys;
  283. union {
  284. struct bkey start[0];
  285. __u64 d[0];
  286. };
  287. };
  288. /* OBSOLETE */
  289. /* UUIDS - per backing device/flash only volume metadata */
  290. struct uuid_entry_v0 {
  291. __u8 uuid[16];
  292. __u8 label[32];
  293. __u32 first_reg;
  294. __u32 last_reg;
  295. __u32 invalidated;
  296. __u32 pad;
  297. };
  298. #endif /* _LINUX_BCACHE_H */