ctree.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include "kerncompat.h"
  4. #include "radix-tree.h"
  5. #include "ctree.h"
  6. #include "disk-io.h"
  7. #include "print-tree.h"
  8. static int split_node(struct ctree_root *root, struct ctree_path *path,
  9. int level);
  10. static int split_leaf(struct ctree_root *root, struct ctree_path *path,
  11. int data_size);
  12. static int push_node_left(struct ctree_root *root, struct tree_buffer *dst,
  13. struct tree_buffer *src);
  14. static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level,
  15. int slot);
  16. inline void init_path(struct ctree_path *p)
  17. {
  18. memset(p, 0, sizeof(*p));
  19. }
  20. void release_path(struct ctree_root *root, struct ctree_path *p)
  21. {
  22. int i;
  23. for (i = 0; i < MAX_LEVEL; i++) {
  24. if (!p->nodes[i])
  25. break;
  26. tree_block_release(root, p->nodes[i]);
  27. }
  28. memset(p, 0, sizeof(*p));
  29. }
  30. /*
  31. * The leaf data grows from end-to-front in the node.
  32. * this returns the address of the start of the last item,
  33. * which is the stop of the leaf data stack
  34. */
  35. static inline unsigned int leaf_data_end(struct leaf *leaf)
  36. {
  37. unsigned int nr = leaf->header.nritems;
  38. if (nr == 0)
  39. return sizeof(leaf->data);
  40. return leaf->items[nr-1].offset;
  41. }
  42. /*
  43. * The space between the end of the leaf items and
  44. * the start of the leaf data. IOW, how much room
  45. * the leaf has left for both items and data
  46. */
  47. int leaf_free_space(struct leaf *leaf)
  48. {
  49. int data_end = leaf_data_end(leaf);
  50. int nritems = leaf->header.nritems;
  51. char *items_end = (char *)(leaf->items + nritems + 1);
  52. return (char *)(leaf->data + data_end) - (char *)items_end;
  53. }
  54. /*
  55. * compare two keys in a memcmp fashion
  56. */
  57. int comp_keys(struct key *k1, struct key *k2)
  58. {
  59. if (k1->objectid > k2->objectid)
  60. return 1;
  61. if (k1->objectid < k2->objectid)
  62. return -1;
  63. if (k1->flags > k2->flags)
  64. return 1;
  65. if (k1->flags < k2->flags)
  66. return -1;
  67. if (k1->offset > k2->offset)
  68. return 1;
  69. if (k1->offset < k2->offset)
  70. return -1;
  71. return 0;
  72. }
  73. int check_node(struct ctree_path *path, int level)
  74. {
  75. int i;
  76. struct node *parent = NULL;
  77. struct node *node = &path->nodes[level]->node;
  78. int parent_slot;
  79. if (path->nodes[level + 1])
  80. parent = &path->nodes[level + 1]->node;
  81. parent_slot = path->slots[level + 1];
  82. if (parent && node->header.nritems > 0) {
  83. struct key *parent_key;
  84. parent_key = &parent->keys[parent_slot];
  85. BUG_ON(memcmp(parent_key, node->keys, sizeof(struct key)));
  86. BUG_ON(parent->blockptrs[parent_slot] != node->header.blocknr);
  87. }
  88. BUG_ON(node->header.nritems > NODEPTRS_PER_BLOCK);
  89. for (i = 0; i < node->header.nritems - 2; i++) {
  90. BUG_ON(comp_keys(&node->keys[i], &node->keys[i+1]) >= 0);
  91. }
  92. return 0;
  93. }
  94. int check_leaf(struct ctree_path *path, int level)
  95. {
  96. int i;
  97. struct leaf *leaf = &path->nodes[level]->leaf;
  98. struct node *parent = NULL;
  99. int parent_slot;
  100. if (path->nodes[level + 1])
  101. parent = &path->nodes[level + 1]->node;
  102. parent_slot = path->slots[level + 1];
  103. if (parent && leaf->header.nritems > 0) {
  104. struct key *parent_key;
  105. parent_key = &parent->keys[parent_slot];
  106. BUG_ON(memcmp(parent_key, &leaf->items[0].key,
  107. sizeof(struct key)));
  108. BUG_ON(parent->blockptrs[parent_slot] != leaf->header.blocknr);
  109. }
  110. for (i = 0; i < leaf->header.nritems - 2; i++) {
  111. BUG_ON(comp_keys(&leaf->items[i].key,
  112. &leaf->items[i+1].key) >= 0);
  113. BUG_ON(leaf->items[i].offset != leaf->items[i + 1].offset +
  114. leaf->items[i + 1].size);
  115. if (i == 0) {
  116. BUG_ON(leaf->items[i].offset + leaf->items[i].size !=
  117. LEAF_DATA_SIZE);
  118. }
  119. }
  120. BUG_ON(leaf_free_space(leaf) < 0);
  121. return 0;
  122. }
  123. int check_block(struct ctree_path *path, int level)
  124. {
  125. if (level == 0)
  126. return check_leaf(path, level);
  127. return check_node(path, level);
  128. }
  129. /*
  130. * search for key in the array p. items p are item_size apart
  131. * and there are 'max' items in p
  132. * the slot in the array is returned via slot, and it points to
  133. * the place where you would insert key if it is not found in
  134. * the array.
  135. *
  136. * slot may point to max if the key is bigger than all of the keys
  137. */
  138. int generic_bin_search(char *p, int item_size, struct key *key,
  139. int max, int *slot)
  140. {
  141. int low = 0;
  142. int high = max;
  143. int mid;
  144. int ret;
  145. struct key *tmp;
  146. while(low < high) {
  147. mid = (low + high) / 2;
  148. tmp = (struct key *)(p + mid * item_size);
  149. ret = comp_keys(tmp, key);
  150. if (ret < 0)
  151. low = mid + 1;
  152. else if (ret > 0)
  153. high = mid;
  154. else {
  155. *slot = mid;
  156. return 0;
  157. }
  158. }
  159. *slot = low;
  160. return 1;
  161. }
  162. /*
  163. * simple bin_search frontend that does the right thing for
  164. * leaves vs nodes
  165. */
  166. int bin_search(struct node *c, struct key *key, int *slot)
  167. {
  168. if (is_leaf(c->header.flags)) {
  169. struct leaf *l = (struct leaf *)c;
  170. return generic_bin_search((void *)l->items, sizeof(struct item),
  171. key, c->header.nritems, slot);
  172. } else {
  173. return generic_bin_search((void *)c->keys, sizeof(struct key),
  174. key, c->header.nritems, slot);
  175. }
  176. return -1;
  177. }
  178. struct tree_buffer *read_node_slot(struct ctree_root *root,
  179. struct tree_buffer *parent_buf,
  180. int slot)
  181. {
  182. struct node *node = &parent_buf->node;
  183. if (slot < 0)
  184. return NULL;
  185. if (slot >= node->header.nritems)
  186. return NULL;
  187. return read_tree_block(root, node->blockptrs[slot]);
  188. }
  189. static int balance_level(struct ctree_root *root, struct ctree_path *path,
  190. int level)
  191. {
  192. struct tree_buffer *right_buf;
  193. struct tree_buffer *mid_buf;
  194. struct tree_buffer *left_buf;
  195. struct tree_buffer *parent_buf = NULL;
  196. struct node *right = NULL;
  197. struct node *mid;
  198. struct node *left = NULL;
  199. struct node *parent = NULL;
  200. int ret = 0;
  201. int wret;
  202. int pslot;
  203. int used = 0;
  204. int count;
  205. int orig_slot = path->slots[level];
  206. if (level == 0)
  207. return 0;
  208. mid_buf = path->nodes[level];
  209. mid = &mid_buf->node;
  210. if (level < MAX_LEVEL - 1)
  211. parent_buf = path->nodes[level + 1];
  212. pslot = path->slots[level + 1];
  213. if (!parent_buf) {
  214. struct tree_buffer *child;
  215. u64 blocknr = mid_buf->blocknr;
  216. if (mid->header.nritems != 1)
  217. return 0;
  218. /* promote the child to a root */
  219. child = read_node_slot(root, mid_buf, 0);
  220. BUG_ON(!child);
  221. root->node = child;
  222. path->nodes[level] = NULL;
  223. /* once for the path */
  224. tree_block_release(root, mid_buf);
  225. /* once for the root ptr */
  226. tree_block_release(root, mid_buf);
  227. return free_extent(root, blocknr, 1);
  228. }
  229. parent = &parent_buf->node;
  230. if (mid->header.nritems > NODEPTRS_PER_BLOCK / 4)
  231. return 0;
  232. // print_tree(root, root->node);
  233. left_buf = read_node_slot(root, parent_buf, pslot - 1);
  234. right_buf = read_node_slot(root, parent_buf, pslot + 1);
  235. if (right_buf) {
  236. right = &right_buf->node;
  237. used = right->header.nritems;
  238. count = 1;
  239. }
  240. if (left_buf) {
  241. left = &left_buf->node;
  242. used += left->header.nritems;
  243. orig_slot += left->header.nritems;
  244. count++;
  245. }
  246. if (left_buf)
  247. push_node_left(root, left_buf, mid_buf);
  248. if (right_buf) {
  249. push_node_left(root, mid_buf, right_buf);
  250. if (right->header.nritems == 0) {
  251. u64 blocknr = right_buf->blocknr;
  252. tree_block_release(root, right_buf);
  253. right_buf = NULL;
  254. right = NULL;
  255. wret = del_ptr(root, path, level + 1, pslot + 1);
  256. if (wret)
  257. ret = wret;
  258. wret = free_extent(root, blocknr, 1);
  259. if (wret)
  260. ret = wret;
  261. } else {
  262. memcpy(parent->keys + pslot + 1, right->keys,
  263. sizeof(struct key));
  264. }
  265. }
  266. if (mid->header.nritems == 0) {
  267. u64 blocknr = mid_buf->blocknr;
  268. tree_block_release(root, mid_buf);
  269. mid_buf = NULL;
  270. mid = NULL;
  271. wret = del_ptr(root, path, level + 1, pslot);
  272. if (wret)
  273. ret = wret;
  274. wret = free_extent(root, blocknr, 1);
  275. if (wret)
  276. ret = wret;
  277. } else
  278. memcpy(parent->keys + pslot, mid->keys, sizeof(struct key));
  279. if (left_buf) {
  280. if (left->header.nritems >= orig_slot) {
  281. left_buf->count++; // released below
  282. path->nodes[level] = left_buf;
  283. path->slots[level + 1] -= 1;
  284. path->slots[level] = orig_slot;
  285. if (mid_buf)
  286. tree_block_release(root, mid_buf);
  287. } else {
  288. orig_slot -= left->header.nritems;
  289. path->slots[level] = orig_slot;
  290. }
  291. }
  292. if (right_buf)
  293. tree_block_release(root, right_buf);
  294. if (left_buf)
  295. tree_block_release(root, left_buf);
  296. return ret;
  297. }
  298. /*
  299. * look for key in the tree. path is filled in with nodes along the way
  300. * if key is found, we return zero and you can find the item in the leaf
  301. * level of the path (level 0)
  302. *
  303. * If the key isn't found, the path points to the slot where it should
  304. * be inserted, and 1 is returned. If there are other errors during the
  305. * search a negative error number is returned.
  306. *
  307. * if ins_len > 0, nodes and leaves will be split as we walk down the
  308. * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
  309. * possible)
  310. */
  311. int search_slot(struct ctree_root *root, struct key *key,
  312. struct ctree_path *p, int ins_len)
  313. {
  314. struct tree_buffer *b;
  315. struct node *c;
  316. int slot;
  317. int ret;
  318. int level;
  319. again:
  320. b = root->node;
  321. b->count++;
  322. while (b) {
  323. c = &b->node;
  324. level = node_level(c->header.flags);
  325. p->nodes[level] = b;
  326. ret = check_block(p, level);
  327. if (ret)
  328. return -1;
  329. ret = bin_search(c, key, &slot);
  330. if (!is_leaf(c->header.flags)) {
  331. if (ret && slot > 0)
  332. slot -= 1;
  333. p->slots[level] = slot;
  334. if (ins_len > 0 &&
  335. c->header.nritems == NODEPTRS_PER_BLOCK) {
  336. int sret = split_node(root, p, level);
  337. BUG_ON(sret > 0);
  338. if (sret)
  339. return sret;
  340. b = p->nodes[level];
  341. c = &b->node;
  342. slot = p->slots[level];
  343. } else if (ins_len < 0) {
  344. int sret = balance_level(root, p, level);
  345. if (sret)
  346. return sret;
  347. b = p->nodes[level];
  348. if (!b)
  349. goto again;
  350. c = &b->node;
  351. slot = p->slots[level];
  352. }
  353. b = read_tree_block(root, c->blockptrs[slot]);
  354. } else {
  355. struct leaf *l = (struct leaf *)c;
  356. p->slots[level] = slot;
  357. if (ins_len > 0 && leaf_free_space(l) <
  358. sizeof(struct item) + ins_len) {
  359. int sret = split_leaf(root, p, ins_len);
  360. BUG_ON(sret > 0);
  361. if (sret)
  362. return sret;
  363. }
  364. BUG_ON(root->node->count == 1);
  365. return ret;
  366. }
  367. }
  368. BUG_ON(root->node->count == 1);
  369. return 1;
  370. }
  371. /*
  372. * adjust the pointers going up the tree, starting at level
  373. * making sure the right key of each node is points to 'key'.
  374. * This is used after shifting pointers to the left, so it stops
  375. * fixing up pointers when a given leaf/node is not in slot 0 of the
  376. * higher levels
  377. *
  378. * If this fails to write a tree block, it returns -1, but continues
  379. * fixing up the blocks in ram so the tree is consistent.
  380. */
  381. static int fixup_low_keys(struct ctree_root *root,
  382. struct ctree_path *path, struct key *key,
  383. int level)
  384. {
  385. int i;
  386. int ret = 0;
  387. int wret;
  388. for (i = level; i < MAX_LEVEL; i++) {
  389. struct node *t;
  390. int tslot = path->slots[i];
  391. if (!path->nodes[i])
  392. break;
  393. t = &path->nodes[i]->node;
  394. memcpy(t->keys + tslot, key, sizeof(*key));
  395. wret = write_tree_block(root, path->nodes[i]);
  396. if (wret)
  397. ret = wret;
  398. if (tslot != 0)
  399. break;
  400. }
  401. return ret;
  402. }
  403. /*
  404. * try to push data from one node into the next node left in the
  405. * tree. The src node is found at specified level in the path.
  406. * If some bytes were pushed, return 0, otherwise return 1.
  407. *
  408. * Lower nodes/leaves in the path are not touched, higher nodes may
  409. * be modified to reflect the push.
  410. *
  411. * The path is altered to reflect the push.
  412. *
  413. * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
  414. * error, and > 0 if there was no room in the left hand block.
  415. */
  416. static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf,
  417. struct tree_buffer *src_buf)
  418. {
  419. struct node *src = &src_buf->node;
  420. struct node *dst = &dst_buf->node;
  421. int push_items = 0;
  422. int src_nritems;
  423. int dst_nritems;
  424. int ret = 0;
  425. int wret;
  426. src_nritems = src->header.nritems;
  427. dst_nritems = dst->header.nritems;
  428. push_items = NODEPTRS_PER_BLOCK - dst_nritems;
  429. if (push_items <= 0) {
  430. return 1;
  431. }
  432. if (src_nritems < push_items)
  433. push_items =src_nritems;
  434. memcpy(dst->keys + dst_nritems, src->keys,
  435. push_items * sizeof(struct key));
  436. memcpy(dst->blockptrs + dst_nritems, src->blockptrs,
  437. push_items * sizeof(u64));
  438. if (push_items < src_nritems) {
  439. memmove(src->keys, src->keys + push_items,
  440. (src_nritems - push_items) * sizeof(struct key));
  441. memmove(src->blockptrs, src->blockptrs + push_items,
  442. (src_nritems - push_items) * sizeof(u64));
  443. }
  444. src->header.nritems -= push_items;
  445. dst->header.nritems += push_items;
  446. wret = write_tree_block(root, src_buf);
  447. if (wret < 0)
  448. ret = wret;
  449. wret = write_tree_block(root, dst_buf);
  450. if (wret < 0)
  451. ret = wret;
  452. return ret;
  453. }
  454. /*
  455. * helper function to insert a new root level in the tree.
  456. * A new node is allocated, and a single item is inserted to
  457. * point to the existing root
  458. *
  459. * returns zero on success or < 0 on failure.
  460. */
  461. static int insert_new_root(struct ctree_root *root,
  462. struct ctree_path *path, int level)
  463. {
  464. struct tree_buffer *t;
  465. struct node *lower;
  466. struct node *c;
  467. struct key *lower_key;
  468. BUG_ON(path->nodes[level]);
  469. BUG_ON(path->nodes[level-1] != root->node);
  470. t = alloc_free_block(root);
  471. c = &t->node;
  472. memset(c, 0, sizeof(c));
  473. c->header.nritems = 1;
  474. c->header.flags = node_level(level);
  475. c->header.blocknr = t->blocknr;
  476. c->header.parentid = root->node->node.header.parentid;
  477. lower = &path->nodes[level-1]->node;
  478. if (is_leaf(lower->header.flags))
  479. lower_key = &((struct leaf *)lower)->items[0].key;
  480. else
  481. lower_key = lower->keys;
  482. memcpy(c->keys, lower_key, sizeof(struct key));
  483. c->blockptrs[0] = path->nodes[level-1]->blocknr;
  484. /* the super has an extra ref to root->node */
  485. tree_block_release(root, root->node);
  486. root->node = t;
  487. t->count++;
  488. write_tree_block(root, t);
  489. path->nodes[level] = t;
  490. path->slots[level] = 0;
  491. return 0;
  492. }
  493. /*
  494. * worker function to insert a single pointer in a node.
  495. * the node should have enough room for the pointer already
  496. *
  497. * slot and level indicate where you want the key to go, and
  498. * blocknr is the block the key points to.
  499. *
  500. * returns zero on success and < 0 on any error
  501. */
  502. static int insert_ptr(struct ctree_root *root,
  503. struct ctree_path *path, struct key *key,
  504. u64 blocknr, int slot, int level)
  505. {
  506. struct node *lower;
  507. int nritems;
  508. BUG_ON(!path->nodes[level]);
  509. lower = &path->nodes[level]->node;
  510. nritems = lower->header.nritems;
  511. if (slot > nritems)
  512. BUG();
  513. if (nritems == NODEPTRS_PER_BLOCK)
  514. BUG();
  515. if (slot != nritems) {
  516. memmove(lower->keys + slot + 1, lower->keys + slot,
  517. (nritems - slot) * sizeof(struct key));
  518. memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
  519. (nritems - slot) * sizeof(u64));
  520. }
  521. memcpy(lower->keys + slot, key, sizeof(struct key));
  522. lower->blockptrs[slot] = blocknr;
  523. lower->header.nritems++;
  524. if (lower->keys[1].objectid == 0)
  525. BUG();
  526. write_tree_block(root, path->nodes[level]);
  527. return 0;
  528. }
  529. /*
  530. * split the node at the specified level in path in two.
  531. * The path is corrected to point to the appropriate node after the split
  532. *
  533. * Before splitting this tries to make some room in the node by pushing
  534. * left and right, if either one works, it returns right away.
  535. *
  536. * returns 0 on success and < 0 on failure
  537. */
  538. static int split_node(struct ctree_root *root, struct ctree_path *path,
  539. int level)
  540. {
  541. struct tree_buffer *t;
  542. struct node *c;
  543. struct tree_buffer *split_buffer;
  544. struct node *split;
  545. int mid;
  546. int ret;
  547. int wret;
  548. t = path->nodes[level];
  549. c = &t->node;
  550. if (t == root->node) {
  551. /* trying to split the root, lets make a new one */
  552. ret = insert_new_root(root, path, level + 1);
  553. if (ret)
  554. return ret;
  555. }
  556. split_buffer = alloc_free_block(root);
  557. split = &split_buffer->node;
  558. split->header.flags = c->header.flags;
  559. split->header.blocknr = split_buffer->blocknr;
  560. split->header.parentid = root->node->node.header.parentid;
  561. mid = (c->header.nritems + 1) / 2;
  562. memcpy(split->keys, c->keys + mid,
  563. (c->header.nritems - mid) * sizeof(struct key));
  564. memcpy(split->blockptrs, c->blockptrs + mid,
  565. (c->header.nritems - mid) * sizeof(u64));
  566. split->header.nritems = c->header.nritems - mid;
  567. c->header.nritems = mid;
  568. ret = 0;
  569. wret = write_tree_block(root, t);
  570. if (wret)
  571. ret = wret;
  572. wret = write_tree_block(root, split_buffer);
  573. if (wret)
  574. ret = wret;
  575. wret = insert_ptr(root, path, split->keys, split_buffer->blocknr,
  576. path->slots[level + 1] + 1, level + 1);
  577. if (wret)
  578. ret = wret;
  579. if (path->slots[level] >= mid) {
  580. path->slots[level] -= mid;
  581. tree_block_release(root, t);
  582. path->nodes[level] = split_buffer;
  583. path->slots[level + 1] += 1;
  584. } else {
  585. tree_block_release(root, split_buffer);
  586. }
  587. return ret;
  588. }
  589. /*
  590. * how many bytes are required to store the items in a leaf. start
  591. * and nr indicate which items in the leaf to check. This totals up the
  592. * space used both by the item structs and the item data
  593. */
  594. static int leaf_space_used(struct leaf *l, int start, int nr)
  595. {
  596. int data_len;
  597. int end = start + nr - 1;
  598. if (!nr)
  599. return 0;
  600. data_len = l->items[start].offset + l->items[start].size;
  601. data_len = data_len - l->items[end].offset;
  602. data_len += sizeof(struct item) * nr;
  603. return data_len;
  604. }
  605. /*
  606. * push some data in the path leaf to the right, trying to free up at
  607. * least data_size bytes. returns zero if the push worked, nonzero otherwise
  608. *
  609. * returns 1 if the push failed because the other node didn't have enough
  610. * room, 0 if everything worked out and < 0 if there were major errors.
  611. */
  612. static int push_leaf_right(struct ctree_root *root, struct ctree_path *path,
  613. int data_size)
  614. {
  615. struct tree_buffer *left_buf = path->nodes[0];
  616. struct leaf *left = &left_buf->leaf;
  617. struct leaf *right;
  618. struct tree_buffer *right_buf;
  619. struct tree_buffer *upper;
  620. int slot;
  621. int i;
  622. int free_space;
  623. int push_space = 0;
  624. int push_items = 0;
  625. struct item *item;
  626. slot = path->slots[1];
  627. if (!path->nodes[1]) {
  628. return 1;
  629. }
  630. upper = path->nodes[1];
  631. if (slot >= upper->node.header.nritems - 1) {
  632. return 1;
  633. }
  634. right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]);
  635. right = &right_buf->leaf;
  636. free_space = leaf_free_space(right);
  637. if (free_space < data_size + sizeof(struct item)) {
  638. tree_block_release(root, right_buf);
  639. return 1;
  640. }
  641. for (i = left->header.nritems - 1; i >= 0; i--) {
  642. item = left->items + i;
  643. if (path->slots[0] == i)
  644. push_space += data_size + sizeof(*item);
  645. if (item->size + sizeof(*item) + push_space > free_space)
  646. break;
  647. push_items++;
  648. push_space += item->size + sizeof(*item);
  649. }
  650. if (push_items == 0) {
  651. tree_block_release(root, right_buf);
  652. return 1;
  653. }
  654. /* push left to right */
  655. push_space = left->items[left->header.nritems - push_items].offset +
  656. left->items[left->header.nritems - push_items].size;
  657. push_space -= leaf_data_end(left);
  658. /* make room in the right data area */
  659. memmove(right->data + leaf_data_end(right) - push_space,
  660. right->data + leaf_data_end(right),
  661. LEAF_DATA_SIZE - leaf_data_end(right));
  662. /* copy from the left data area */
  663. memcpy(right->data + LEAF_DATA_SIZE - push_space,
  664. left->data + leaf_data_end(left),
  665. push_space);
  666. memmove(right->items + push_items, right->items,
  667. right->header.nritems * sizeof(struct item));
  668. /* copy the items from left to right */
  669. memcpy(right->items, left->items + left->header.nritems - push_items,
  670. push_items * sizeof(struct item));
  671. /* update the item pointers */
  672. right->header.nritems += push_items;
  673. push_space = LEAF_DATA_SIZE;
  674. for (i = 0; i < right->header.nritems; i++) {
  675. right->items[i].offset = push_space - right->items[i].size;
  676. push_space = right->items[i].offset;
  677. }
  678. left->header.nritems -= push_items;
  679. write_tree_block(root, left_buf);
  680. write_tree_block(root, right_buf);
  681. memcpy(upper->node.keys + slot + 1,
  682. &right->items[0].key, sizeof(struct key));
  683. write_tree_block(root, upper);
  684. /* then fixup the leaf pointer in the path */
  685. if (path->slots[0] >= left->header.nritems) {
  686. path->slots[0] -= left->header.nritems;
  687. tree_block_release(root, path->nodes[0]);
  688. path->nodes[0] = right_buf;
  689. path->slots[1] += 1;
  690. } else {
  691. tree_block_release(root, right_buf);
  692. }
  693. return 0;
  694. }
  695. /*
  696. * push some data in the path leaf to the left, trying to free up at
  697. * least data_size bytes. returns zero if the push worked, nonzero otherwise
  698. */
  699. static int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
  700. int data_size)
  701. {
  702. struct tree_buffer *right_buf = path->nodes[0];
  703. struct leaf *right = &right_buf->leaf;
  704. struct tree_buffer *t;
  705. struct leaf *left;
  706. int slot;
  707. int i;
  708. int free_space;
  709. int push_space = 0;
  710. int push_items = 0;
  711. struct item *item;
  712. int old_left_nritems;
  713. int ret = 0;
  714. int wret;
  715. slot = path->slots[1];
  716. if (slot == 0) {
  717. return 1;
  718. }
  719. if (!path->nodes[1]) {
  720. return 1;
  721. }
  722. t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
  723. left = &t->leaf;
  724. free_space = leaf_free_space(left);
  725. if (free_space < data_size + sizeof(struct item)) {
  726. tree_block_release(root, t);
  727. return 1;
  728. }
  729. for (i = 0; i < right->header.nritems; i++) {
  730. item = right->items + i;
  731. if (path->slots[0] == i)
  732. push_space += data_size + sizeof(*item);
  733. if (item->size + sizeof(*item) + push_space > free_space)
  734. break;
  735. push_items++;
  736. push_space += item->size + sizeof(*item);
  737. }
  738. if (push_items == 0) {
  739. tree_block_release(root, t);
  740. return 1;
  741. }
  742. /* push data from right to left */
  743. memcpy(left->items + left->header.nritems,
  744. right->items, push_items * sizeof(struct item));
  745. push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
  746. memcpy(left->data + leaf_data_end(left) - push_space,
  747. right->data + right->items[push_items - 1].offset,
  748. push_space);
  749. old_left_nritems = left->header.nritems;
  750. BUG_ON(old_left_nritems < 0);
  751. for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
  752. left->items[i].offset -= LEAF_DATA_SIZE -
  753. left->items[old_left_nritems -1].offset;
  754. }
  755. left->header.nritems += push_items;
  756. /* fixup right node */
  757. push_space = right->items[push_items-1].offset - leaf_data_end(right);
  758. memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
  759. leaf_data_end(right), push_space);
  760. memmove(right->items, right->items + push_items,
  761. (right->header.nritems - push_items) * sizeof(struct item));
  762. right->header.nritems -= push_items;
  763. push_space = LEAF_DATA_SIZE;
  764. for (i = 0; i < right->header.nritems; i++) {
  765. right->items[i].offset = push_space - right->items[i].size;
  766. push_space = right->items[i].offset;
  767. }
  768. wret = write_tree_block(root, t);
  769. if (wret)
  770. ret = wret;
  771. wret = write_tree_block(root, right_buf);
  772. if (wret)
  773. ret = wret;
  774. wret = fixup_low_keys(root, path, &right->items[0].key, 1);
  775. if (wret)
  776. ret = wret;
  777. /* then fixup the leaf pointer in the path */
  778. if (path->slots[0] < push_items) {
  779. path->slots[0] += old_left_nritems;
  780. tree_block_release(root, path->nodes[0]);
  781. path->nodes[0] = t;
  782. path->slots[1] -= 1;
  783. } else {
  784. tree_block_release(root, t);
  785. path->slots[0] -= push_items;
  786. }
  787. BUG_ON(path->slots[0] < 0);
  788. return ret;
  789. }
  790. /*
  791. * split the path's leaf in two, making sure there is at least data_size
  792. * available for the resulting leaf level of the path.
  793. *
  794. * returns 0 if all went well and < 0 on failure.
  795. */
  796. static int split_leaf(struct ctree_root *root, struct ctree_path *path,
  797. int data_size)
  798. {
  799. struct tree_buffer *l_buf;
  800. struct leaf *l;
  801. int nritems;
  802. int mid;
  803. int slot;
  804. struct leaf *right;
  805. struct tree_buffer *right_buffer;
  806. int space_needed = data_size + sizeof(struct item);
  807. int data_copy_size;
  808. int rt_data_off;
  809. int i;
  810. int ret;
  811. int wret;
  812. wret = push_leaf_left(root, path, data_size);
  813. if (wret < 0)
  814. return wret;
  815. if (wret) {
  816. wret = push_leaf_right(root, path, data_size);
  817. if (wret < 0)
  818. return wret;
  819. }
  820. l_buf = path->nodes[0];
  821. l = &l_buf->leaf;
  822. /* did the pushes work? */
  823. if (leaf_free_space(l) >= sizeof(struct item) + data_size)
  824. return 0;
  825. if (!path->nodes[1]) {
  826. ret = insert_new_root(root, path, 1);
  827. if (ret)
  828. return ret;
  829. }
  830. slot = path->slots[0];
  831. nritems = l->header.nritems;
  832. mid = (nritems + 1)/ 2;
  833. right_buffer = alloc_free_block(root);
  834. BUG_ON(!right_buffer);
  835. BUG_ON(mid == nritems);
  836. right = &right_buffer->leaf;
  837. memset(right, 0, sizeof(*right));
  838. if (mid <= slot) {
  839. /* FIXME, just alloc a new leaf here */
  840. if (leaf_space_used(l, mid, nritems - mid) + space_needed >
  841. LEAF_DATA_SIZE)
  842. BUG();
  843. } else {
  844. /* FIXME, just alloc a new leaf here */
  845. if (leaf_space_used(l, 0, mid + 1) + space_needed >
  846. LEAF_DATA_SIZE)
  847. BUG();
  848. }
  849. right->header.nritems = nritems - mid;
  850. right->header.blocknr = right_buffer->blocknr;
  851. right->header.flags = node_level(0);
  852. right->header.parentid = root->node->node.header.parentid;
  853. data_copy_size = l->items[mid].offset + l->items[mid].size -
  854. leaf_data_end(l);
  855. memcpy(right->items, l->items + mid,
  856. (nritems - mid) * sizeof(struct item));
  857. memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
  858. l->data + leaf_data_end(l), data_copy_size);
  859. rt_data_off = LEAF_DATA_SIZE -
  860. (l->items[mid].offset + l->items[mid].size);
  861. for (i = 0; i < right->header.nritems; i++)
  862. right->items[i].offset += rt_data_off;
  863. l->header.nritems = mid;
  864. ret = 0;
  865. wret = insert_ptr(root, path, &right->items[0].key,
  866. right_buffer->blocknr, path->slots[1] + 1, 1);
  867. if (wret)
  868. ret = wret;
  869. wret = write_tree_block(root, right_buffer);
  870. if (wret)
  871. ret = wret;
  872. wret = write_tree_block(root, l_buf);
  873. if (wret)
  874. ret = wret;
  875. BUG_ON(path->slots[0] != slot);
  876. if (mid <= slot) {
  877. tree_block_release(root, path->nodes[0]);
  878. path->nodes[0] = right_buffer;
  879. path->slots[0] -= mid;
  880. path->slots[1] += 1;
  881. } else
  882. tree_block_release(root, right_buffer);
  883. BUG_ON(path->slots[0] < 0);
  884. return ret;
  885. }
  886. /*
  887. * Given a key and some data, insert an item into the tree.
  888. * This does all the path init required, making room in the tree if needed.
  889. */
  890. int insert_item(struct ctree_root *root, struct key *key,
  891. void *data, int data_size)
  892. {
  893. int ret = 0;
  894. int wret;
  895. int slot;
  896. int slot_orig;
  897. struct leaf *leaf;
  898. struct tree_buffer *leaf_buf;
  899. unsigned int nritems;
  900. unsigned int data_end;
  901. struct ctree_path path;
  902. /* create a root if there isn't one */
  903. if (!root->node)
  904. BUG();
  905. init_path(&path);
  906. ret = search_slot(root, key, &path, data_size);
  907. if (ret == 0) {
  908. release_path(root, &path);
  909. return -EEXIST;
  910. }
  911. if (ret < 0) {
  912. release_path(root, &path);
  913. return ret;
  914. }
  915. slot_orig = path.slots[0];
  916. leaf_buf = path.nodes[0];
  917. leaf = &leaf_buf->leaf;
  918. nritems = leaf->header.nritems;
  919. data_end = leaf_data_end(leaf);
  920. if (leaf_free_space(leaf) < sizeof(struct item) + data_size)
  921. BUG();
  922. slot = path.slots[0];
  923. BUG_ON(slot < 0);
  924. if (slot != nritems) {
  925. int i;
  926. unsigned int old_data = leaf->items[slot].offset +
  927. leaf->items[slot].size;
  928. /*
  929. * item0..itemN ... dataN.offset..dataN.size .. data0.size
  930. */
  931. /* first correct the data pointers */
  932. for (i = slot; i < nritems; i++)
  933. leaf->items[i].offset -= data_size;
  934. /* shift the items */
  935. memmove(leaf->items + slot + 1, leaf->items + slot,
  936. (nritems - slot) * sizeof(struct item));
  937. /* shift the data */
  938. memmove(leaf->data + data_end - data_size, leaf->data +
  939. data_end, old_data - data_end);
  940. data_end = old_data;
  941. }
  942. /* copy the new data in */
  943. memcpy(&leaf->items[slot].key, key, sizeof(struct key));
  944. leaf->items[slot].offset = data_end - data_size;
  945. leaf->items[slot].size = data_size;
  946. memcpy(leaf->data + data_end - data_size, data, data_size);
  947. leaf->header.nritems += 1;
  948. ret = 0;
  949. if (slot == 0)
  950. ret = fixup_low_keys(root, &path, key, 1);
  951. wret = write_tree_block(root, leaf_buf);
  952. if (wret)
  953. ret = wret;
  954. if (leaf_free_space(leaf) < 0)
  955. BUG();
  956. check_leaf(&path, 0);
  957. release_path(root, &path);
  958. return ret;
  959. }
  960. /*
  961. * delete the pointer from a given node.
  962. *
  963. * If the delete empties a node, the node is removed from the tree,
  964. * continuing all the way the root if required. The root is converted into
  965. * a leaf if all the nodes are emptied.
  966. */
  967. static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level,
  968. int slot)
  969. {
  970. struct node *node;
  971. struct tree_buffer *parent = path->nodes[level];
  972. int nritems;
  973. int ret = 0;
  974. int wret;
  975. node = &parent->node;
  976. nritems = node->header.nritems;
  977. if (slot != nritems -1) {
  978. memmove(node->keys + slot, node->keys + slot + 1,
  979. sizeof(struct key) * (nritems - slot - 1));
  980. memmove(node->blockptrs + slot,
  981. node->blockptrs + slot + 1,
  982. sizeof(u64) * (nritems - slot - 1));
  983. }
  984. node->header.nritems--;
  985. if (node->header.nritems == 0 && parent == root->node) {
  986. BUG_ON(node_level(root->node->node.header.flags) != 1);
  987. /* just turn the root into a leaf and break */
  988. root->node->node.header.flags = node_level(0);
  989. } else if (slot == 0) {
  990. wret = fixup_low_keys(root, path, node->keys, level + 1);
  991. if (wret)
  992. ret = wret;
  993. }
  994. wret = write_tree_block(root, parent);
  995. if (wret)
  996. ret = wret;
  997. return ret;
  998. }
  999. /*
  1000. * delete the item at the leaf level in path. If that empties
  1001. * the leaf, remove it from the tree
  1002. */
  1003. int del_item(struct ctree_root *root, struct ctree_path *path)
  1004. {
  1005. int slot;
  1006. struct leaf *leaf;
  1007. struct tree_buffer *leaf_buf;
  1008. int doff;
  1009. int dsize;
  1010. int ret = 0;
  1011. int wret;
  1012. leaf_buf = path->nodes[0];
  1013. leaf = &leaf_buf->leaf;
  1014. slot = path->slots[0];
  1015. doff = leaf->items[slot].offset;
  1016. dsize = leaf->items[slot].size;
  1017. if (slot != leaf->header.nritems - 1) {
  1018. int i;
  1019. int data_end = leaf_data_end(leaf);
  1020. memmove(leaf->data + data_end + dsize,
  1021. leaf->data + data_end,
  1022. doff - data_end);
  1023. for (i = slot + 1; i < leaf->header.nritems; i++)
  1024. leaf->items[i].offset += dsize;
  1025. memmove(leaf->items + slot, leaf->items + slot + 1,
  1026. sizeof(struct item) *
  1027. (leaf->header.nritems - slot - 1));
  1028. }
  1029. leaf->header.nritems -= 1;
  1030. /* delete the leaf if we've emptied it */
  1031. if (leaf->header.nritems == 0) {
  1032. if (leaf_buf == root->node) {
  1033. leaf->header.flags = node_level(0);
  1034. write_tree_block(root, leaf_buf);
  1035. } else {
  1036. wret = del_ptr(root, path, 1, path->slots[1]);
  1037. if (wret)
  1038. ret = wret;
  1039. wret = free_extent(root, leaf_buf->blocknr, 1);
  1040. if (wret)
  1041. ret = wret;
  1042. }
  1043. } else {
  1044. int used = leaf_space_used(leaf, 0, leaf->header.nritems);
  1045. if (slot == 0) {
  1046. wret = fixup_low_keys(root, path,
  1047. &leaf->items[0].key, 1);
  1048. if (wret)
  1049. ret = wret;
  1050. }
  1051. wret = write_tree_block(root, leaf_buf);
  1052. if (wret)
  1053. ret = wret;
  1054. /* delete the leaf if it is mostly empty */
  1055. if (used < LEAF_DATA_SIZE / 3) {
  1056. /* push_leaf_left fixes the path.
  1057. * make sure the path still points to our leaf
  1058. * for possible call to del_ptr below
  1059. */
  1060. slot = path->slots[1];
  1061. leaf_buf->count++;
  1062. wret = push_leaf_left(root, path, 1);
  1063. if (wret < 0)
  1064. ret = wret;
  1065. if (leaf->header.nritems) {
  1066. wret = push_leaf_right(root, path, 1);
  1067. if (wret < 0)
  1068. ret = wret;
  1069. }
  1070. if (leaf->header.nritems == 0) {
  1071. u64 blocknr = leaf_buf->blocknr;
  1072. wret = del_ptr(root, path, 1, slot);
  1073. if (wret)
  1074. ret = wret;
  1075. tree_block_release(root, leaf_buf);
  1076. wret = free_extent(root, blocknr, 1);
  1077. if (wret)
  1078. ret = wret;
  1079. } else {
  1080. tree_block_release(root, leaf_buf);
  1081. }
  1082. }
  1083. }
  1084. return ret;
  1085. }
  1086. /*
  1087. * walk up the tree as far as required to find the next leaf.
  1088. * returns 0 if it found something or 1 if there are no greater leaves.
  1089. * returns < 0 on io errors.
  1090. */
  1091. int next_leaf(struct ctree_root *root, struct ctree_path *path)
  1092. {
  1093. int slot;
  1094. int level = 1;
  1095. u64 blocknr;
  1096. struct tree_buffer *c;
  1097. struct tree_buffer *next = NULL;
  1098. while(level < MAX_LEVEL) {
  1099. if (!path->nodes[level])
  1100. return 1;
  1101. slot = path->slots[level] + 1;
  1102. c = path->nodes[level];
  1103. if (slot >= c->node.header.nritems) {
  1104. level++;
  1105. continue;
  1106. }
  1107. blocknr = c->node.blockptrs[slot];
  1108. if (next)
  1109. tree_block_release(root, next);
  1110. next = read_tree_block(root, blocknr);
  1111. break;
  1112. }
  1113. path->slots[level] = slot;
  1114. while(1) {
  1115. level--;
  1116. c = path->nodes[level];
  1117. tree_block_release(root, c);
  1118. path->nodes[level] = next;
  1119. path->slots[level] = 0;
  1120. if (!level)
  1121. break;
  1122. next = read_tree_block(root, next->node.blockptrs[0]);
  1123. }
  1124. return 0;
  1125. }