osdmap.h 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. #ifndef _FS_CEPH_OSDMAP_H
  2. #define _FS_CEPH_OSDMAP_H
  3. #include <linux/rbtree.h>
  4. #include <linux/ceph/types.h>
  5. #include <linux/ceph/decode.h>
  6. #include <linux/ceph/ceph_fs.h>
  7. #include <linux/crush/crush.h>
  8. /*
  9. * The osd map describes the current membership of the osd cluster and
  10. * specifies the mapping of objects to placement groups and placement
  11. * groups to (sets of) osds. That is, it completely specifies the
  12. * (desired) distribution of all data objects in the system at some
  13. * point in time.
  14. *
  15. * Each map version is identified by an epoch, which increases monotonically.
  16. *
  17. * The map can be updated either via an incremental map (diff) describing
  18. * the change between two successive epochs, or as a fully encoded map.
  19. */
  20. struct ceph_pg {
  21. uint64_t pool;
  22. uint32_t seed;
  23. };
  24. #define CEPH_POOL_FLAG_HASHPSPOOL 1
  25. struct ceph_pg_pool_info {
  26. struct rb_node node;
  27. s64 id;
  28. u8 type;
  29. u8 size;
  30. u8 crush_ruleset;
  31. u8 object_hash;
  32. u32 pg_num, pgp_num;
  33. int pg_num_mask, pgp_num_mask;
  34. u64 flags;
  35. char *name;
  36. };
  37. struct ceph_object_locator {
  38. uint64_t pool;
  39. char *key;
  40. };
  41. struct ceph_pg_mapping {
  42. struct rb_node node;
  43. struct ceph_pg pgid;
  44. int len;
  45. int osds[];
  46. };
  47. struct ceph_osdmap {
  48. struct ceph_fsid fsid;
  49. u32 epoch;
  50. u32 mkfs_epoch;
  51. struct ceph_timespec created, modified;
  52. u32 flags; /* CEPH_OSDMAP_* */
  53. u32 max_osd; /* size of osd_state, _offload, _addr arrays */
  54. u8 *osd_state; /* CEPH_OSD_* */
  55. u32 *osd_weight; /* 0 = failed, 0x10000 = 100% normal */
  56. struct ceph_entity_addr *osd_addr;
  57. struct rb_root pg_temp;
  58. struct rb_root pg_pools;
  59. u32 pool_max;
  60. /* the CRUSH map specifies the mapping of placement groups to
  61. * the list of osds that store+replicate them. */
  62. struct crush_map *crush;
  63. };
  64. /*
  65. * file layout helpers
  66. */
  67. #define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
  68. #define ceph_file_layout_stripe_count(l) \
  69. ((__s32)le32_to_cpu((l).fl_stripe_count))
  70. #define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
  71. #define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
  72. #define ceph_file_layout_object_su(l) \
  73. ((__s32)le32_to_cpu((l).fl_object_stripe_unit))
  74. #define ceph_file_layout_pg_pool(l) \
  75. ((__s32)le32_to_cpu((l).fl_pg_pool))
  76. static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
  77. {
  78. return le32_to_cpu(l->fl_stripe_unit) *
  79. le32_to_cpu(l->fl_stripe_count);
  80. }
  81. /* "period" == bytes before i start on a new set of objects */
  82. static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
  83. {
  84. return le32_to_cpu(l->fl_object_size) *
  85. le32_to_cpu(l->fl_stripe_count);
  86. }
  87. static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
  88. {
  89. return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP);
  90. }
  91. static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
  92. {
  93. return map && (map->flags & flag);
  94. }
  95. extern char *ceph_osdmap_state_str(char *str, int len, int state);
  96. static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
  97. int osd)
  98. {
  99. if (osd >= map->max_osd)
  100. return NULL;
  101. return &map->osd_addr[osd];
  102. }
  103. static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
  104. {
  105. __u8 version;
  106. if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
  107. pr_warning("incomplete pg encoding");
  108. return -EINVAL;
  109. }
  110. version = ceph_decode_8(p);
  111. if (version > 1) {
  112. pr_warning("do not understand pg encoding %d > 1",
  113. (int)version);
  114. return -EINVAL;
  115. }
  116. pgid->pool = ceph_decode_64(p);
  117. pgid->seed = ceph_decode_32(p);
  118. *p += 4; /* skip deprecated preferred value */
  119. return 0;
  120. }
  121. extern struct ceph_osdmap *osdmap_decode(void **p, void *end);
  122. extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
  123. struct ceph_osdmap *map,
  124. struct ceph_messenger *msgr);
  125. extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
  126. /* calculate mapping of a file extent to an object */
  127. extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
  128. u64 off, u64 len,
  129. u64 *bno, u64 *oxoff, u64 *oxlen);
  130. /* calculate mapping of object to a placement group */
  131. extern int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid,
  132. struct ceph_osdmap *osdmap, uint64_t pool);
  133. extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
  134. struct ceph_pg pgid,
  135. int *acting);
  136. extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
  137. struct ceph_pg pgid);
  138. extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
  139. extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
  140. #endif