ctree.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include "kerncompat.h"
  4. #include "radix-tree.h"
  5. #include "ctree.h"
  6. #include "disk-io.h"
  7. static inline void init_path(struct ctree_path *p)
  8. {
  9. memset(p, 0, sizeof(*p));
  10. }
  11. static void release_path(struct ctree_root *root, struct ctree_path *p)
  12. {
  13. int i;
  14. for (i = 0; i < MAX_LEVEL; i++) {
  15. if (!p->nodes[i])
  16. break;
  17. tree_block_release(root, p->nodes[i]);
  18. }
  19. }
  20. /*
  21. * The leaf data grows from end-to-front in the node.
  22. * this returns the address of the start of the last item,
  23. * which is the stop of the leaf data stack
  24. */
  25. static inline unsigned int leaf_data_end(struct leaf *leaf)
  26. {
  27. unsigned int nr = leaf->header.nritems;
  28. if (nr == 0)
  29. return ARRAY_SIZE(leaf->data);
  30. return leaf->items[nr-1].offset;
  31. }
  32. /*
  33. * The space between the end of the leaf items and
  34. * the start of the leaf data. IOW, how much room
  35. * the leaf has left for both items and data
  36. */
  37. static inline int leaf_free_space(struct leaf *leaf)
  38. {
  39. int data_end = leaf_data_end(leaf);
  40. int nritems = leaf->header.nritems;
  41. char *items_end = (char *)(leaf->items + nritems + 1);
  42. return (char *)(leaf->data + data_end) - (char *)items_end;
  43. }
  44. /*
  45. * compare two keys in a memcmp fashion
  46. */
  47. int comp_keys(struct key *k1, struct key *k2)
  48. {
  49. if (k1->objectid > k2->objectid)
  50. return 1;
  51. if (k1->objectid < k2->objectid)
  52. return -1;
  53. if (k1->flags > k2->flags)
  54. return 1;
  55. if (k1->flags < k2->flags)
  56. return -1;
  57. if (k1->offset > k2->offset)
  58. return 1;
  59. if (k1->offset < k2->offset)
  60. return -1;
  61. return 0;
  62. }
  63. /*
  64. * search for key in the array p. items p are item_size apart
  65. * and there are 'max' items in p
  66. * the slot in the array is returned via slot, and it points to
  67. * the place where you would insert key if it is not found in
  68. * the array.
  69. *
  70. * slot may point to max if the key is bigger than all of the keys
  71. */
  72. int generic_bin_search(char *p, int item_size, struct key *key,
  73. int max, int *slot)
  74. {
  75. int low = 0;
  76. int high = max;
  77. int mid;
  78. int ret;
  79. struct key *tmp;
  80. while(low < high) {
  81. mid = (low + high) / 2;
  82. tmp = (struct key *)(p + mid * item_size);
  83. ret = comp_keys(tmp, key);
  84. if (ret < 0)
  85. low = mid + 1;
  86. else if (ret > 0)
  87. high = mid;
  88. else {
  89. *slot = mid;
  90. return 0;
  91. }
  92. }
  93. *slot = low;
  94. return 1;
  95. }
  96. int bin_search(struct node *c, struct key *key, int *slot)
  97. {
  98. if (is_leaf(c->header.flags)) {
  99. struct leaf *l = (struct leaf *)c;
  100. return generic_bin_search((void *)l->items, sizeof(struct item),
  101. key, c->header.nritems, slot);
  102. } else {
  103. return generic_bin_search((void *)c->keys, sizeof(struct key),
  104. key, c->header.nritems, slot);
  105. }
  106. return -1;
  107. }
  108. /*
  109. * look for key in the tree. path is filled in with nodes along the way
  110. * if key is found, we return zero and you can find the item in the leaf
  111. * level of the path (level 0)
  112. *
  113. * If the key isn't found, the path points to the slot where it should
  114. * be inserted.
  115. */
  116. int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
  117. {
  118. struct tree_buffer *b = root->node;
  119. struct node *c;
  120. int slot;
  121. int ret;
  122. int level;
  123. b->count++;
  124. while (b) {
  125. c = &b->node;
  126. level = node_level(c->header.flags);
  127. p->nodes[level] = b;
  128. ret = bin_search(c, key, &slot);
  129. if (!is_leaf(c->header.flags)) {
  130. if (ret && slot > 0)
  131. slot -= 1;
  132. p->slots[level] = slot;
  133. b = read_tree_block(root, c->blockptrs[slot]);
  134. continue;
  135. } else {
  136. p->slots[level] = slot;
  137. return ret;
  138. }
  139. }
  140. return -1;
  141. }
  142. /*
  143. * adjust the pointers going up the tree, starting at level
  144. * making sure the right key of each node is points to 'key'.
  145. * This is used after shifting pointers to the left, so it stops
  146. * fixing up pointers when a given leaf/node is not in slot 0 of the
  147. * higher levels
  148. */
  149. static void fixup_low_keys(struct ctree_root *root,
  150. struct ctree_path *path, struct key *key,
  151. int level)
  152. {
  153. int i;
  154. for (i = level; i < MAX_LEVEL; i++) {
  155. struct node *t;
  156. int tslot = path->slots[i];
  157. if (!path->nodes[i])
  158. break;
  159. t = &path->nodes[i]->node;
  160. memcpy(t->keys + tslot, key, sizeof(*key));
  161. write_tree_block(root, path->nodes[i]);
  162. if (tslot != 0)
  163. break;
  164. }
  165. }
  166. /*
  167. * try to push data from one node into the next node left in the
  168. * tree. The src node is found at specified level in the path.
  169. * If some bytes were pushed, return 0, otherwise return 1.
  170. *
  171. * Lower nodes/leaves in the path are not touched, higher nodes may
  172. * be modified to reflect the push.
  173. *
  174. * The path is altered to reflect the push.
  175. */
  176. int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
  177. {
  178. int slot;
  179. struct node *left;
  180. struct node *right;
  181. int push_items = 0;
  182. int left_nritems;
  183. int right_nritems;
  184. struct tree_buffer *t;
  185. struct tree_buffer *right_buf;
  186. if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
  187. return 1;
  188. slot = path->slots[level + 1];
  189. if (slot == 0)
  190. return 1;
  191. t = read_tree_block(root,
  192. path->nodes[level + 1]->node.blockptrs[slot - 1]);
  193. left = &t->node;
  194. right_buf = path->nodes[level];
  195. right = &right_buf->node;
  196. left_nritems = left->header.nritems;
  197. right_nritems = right->header.nritems;
  198. push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
  199. if (push_items <= 0) {
  200. tree_block_release(root, t);
  201. return 1;
  202. }
  203. if (right_nritems < push_items)
  204. push_items = right_nritems;
  205. memcpy(left->keys + left_nritems, right->keys,
  206. push_items * sizeof(struct key));
  207. memcpy(left->blockptrs + left_nritems, right->blockptrs,
  208. push_items * sizeof(u64));
  209. memmove(right->keys, right->keys + push_items,
  210. (right_nritems - push_items) * sizeof(struct key));
  211. memmove(right->blockptrs, right->blockptrs + push_items,
  212. (right_nritems - push_items) * sizeof(u64));
  213. right->header.nritems -= push_items;
  214. left->header.nritems += push_items;
  215. /* adjust the pointers going up the tree */
  216. fixup_low_keys(root, path, right->keys, level + 1);
  217. write_tree_block(root, t);
  218. write_tree_block(root, right_buf);
  219. /* then fixup the leaf pointer in the path */
  220. if (path->slots[level] < push_items) {
  221. path->slots[level] += left_nritems;
  222. tree_block_release(root, path->nodes[level]);
  223. path->nodes[level] = t;
  224. path->slots[level + 1] -= 1;
  225. } else {
  226. path->slots[level] -= push_items;
  227. tree_block_release(root, t);
  228. }
  229. return 0;
  230. }
  231. /*
  232. * try to push data from one node into the next node right in the
  233. * tree. The src node is found at specified level in the path.
  234. * If some bytes were pushed, return 0, otherwise return 1.
  235. *
  236. * Lower nodes/leaves in the path are not touched, higher nodes may
  237. * be modified to reflect the push.
  238. *
  239. * The path is altered to reflect the push.
  240. */
  241. int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
  242. {
  243. int slot;
  244. struct tree_buffer *t;
  245. struct tree_buffer *src_buffer;
  246. struct node *dst;
  247. struct node *src;
  248. int push_items = 0;
  249. int dst_nritems;
  250. int src_nritems;
  251. /* can't push from the root */
  252. if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
  253. return 1;
  254. /* only try to push inside the node higher up */
  255. slot = path->slots[level + 1];
  256. if (slot == NODEPTRS_PER_BLOCK - 1)
  257. return 1;
  258. if (slot >= path->nodes[level + 1]->node.header.nritems -1)
  259. return 1;
  260. t = read_tree_block(root,
  261. path->nodes[level + 1]->node.blockptrs[slot + 1]);
  262. dst = &t->node;
  263. src_buffer = path->nodes[level];
  264. src = &src_buffer->node;
  265. dst_nritems = dst->header.nritems;
  266. src_nritems = src->header.nritems;
  267. push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
  268. if (push_items <= 0) {
  269. tree_block_release(root, t);
  270. return 1;
  271. }
  272. if (src_nritems < push_items)
  273. push_items = src_nritems;
  274. memmove(dst->keys + push_items, dst->keys,
  275. dst_nritems * sizeof(struct key));
  276. memcpy(dst->keys, src->keys + src_nritems - push_items,
  277. push_items * sizeof(struct key));
  278. memmove(dst->blockptrs + push_items, dst->blockptrs,
  279. dst_nritems * sizeof(u64));
  280. memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
  281. push_items * sizeof(u64));
  282. src->header.nritems -= push_items;
  283. dst->header.nritems += push_items;
  284. /* adjust the pointers going up the tree */
  285. memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
  286. dst->keys, sizeof(struct key));
  287. write_tree_block(root, path->nodes[level + 1]);
  288. write_tree_block(root, t);
  289. write_tree_block(root, src_buffer);
  290. /* then fixup the pointers in the path */
  291. if (path->slots[level] >= src->header.nritems) {
  292. path->slots[level] -= src->header.nritems;
  293. tree_block_release(root, path->nodes[level]);
  294. path->nodes[level] = t;
  295. path->slots[level + 1] += 1;
  296. } else {
  297. tree_block_release(root, t);
  298. }
  299. return 0;
  300. }
  301. /*
  302. * worker function to insert a single pointer in a node.
  303. * the node should have enough room for the pointer already
  304. * slot and level indicate where you want the key to go, and
  305. * blocknr is the block the key points to.
  306. */
  307. int __insert_ptr(struct ctree_root *root,
  308. struct ctree_path *path, struct key *key,
  309. u64 blocknr, int slot, int level)
  310. {
  311. struct node *c;
  312. struct node *lower;
  313. struct key *lower_key;
  314. int nritems;
  315. /* need a new root */
  316. if (!path->nodes[level]) {
  317. struct tree_buffer *t;
  318. t = alloc_free_block(root);
  319. c = &t->node;
  320. memset(c, 0, sizeof(c));
  321. c->header.nritems = 2;
  322. c->header.flags = node_level(level);
  323. c->header.blocknr = t->blocknr;
  324. lower = &path->nodes[level-1]->node;
  325. if (is_leaf(lower->header.flags))
  326. lower_key = &((struct leaf *)lower)->items[0].key;
  327. else
  328. lower_key = lower->keys;
  329. memcpy(c->keys, lower_key, sizeof(struct key));
  330. memcpy(c->keys + 1, key, sizeof(struct key));
  331. c->blockptrs[0] = path->nodes[level-1]->blocknr;
  332. c->blockptrs[1] = blocknr;
  333. /* the path has an extra ref to root->node */
  334. tree_block_release(root, root->node);
  335. root->node = t;
  336. t->count++;
  337. write_tree_block(root, t);
  338. path->nodes[level] = t;
  339. path->slots[level] = 0;
  340. if (c->keys[1].objectid == 0)
  341. BUG();
  342. return 0;
  343. }
  344. lower = &path->nodes[level]->node;
  345. nritems = lower->header.nritems;
  346. if (slot > nritems)
  347. BUG();
  348. if (nritems == NODEPTRS_PER_BLOCK)
  349. BUG();
  350. if (slot != nritems) {
  351. memmove(lower->keys + slot + 1, lower->keys + slot,
  352. (nritems - slot) * sizeof(struct key));
  353. memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
  354. (nritems - slot) * sizeof(u64));
  355. }
  356. memcpy(lower->keys + slot, key, sizeof(struct key));
  357. lower->blockptrs[slot] = blocknr;
  358. lower->header.nritems++;
  359. if (lower->keys[1].objectid == 0)
  360. BUG();
  361. write_tree_block(root, path->nodes[level]);
  362. return 0;
  363. }
  364. /*
  365. * insert a key,blocknr pair into the tree at a given level
  366. * If the node at that level in the path doesn't have room,
  367. * it is split or shifted as appropriate.
  368. */
  369. int insert_ptr(struct ctree_root *root,
  370. struct ctree_path *path, struct key *key,
  371. u64 blocknr, int level)
  372. {
  373. struct tree_buffer *t = path->nodes[level];
  374. struct node *c = &path->nodes[level]->node;
  375. struct node *b;
  376. struct tree_buffer *b_buffer;
  377. struct tree_buffer *bal[MAX_LEVEL];
  378. int bal_level = level;
  379. int mid;
  380. int bal_start = -1;
  381. /*
  382. * check to see if we need to make room in the node for this
  383. * pointer. If we do, keep walking the tree, making sure there
  384. * is enough room in each level for the required insertions.
  385. *
  386. * The bal array is filled in with any nodes to be inserted
  387. * due to splitting. Once we've done all the splitting required
  388. * do the inserts based on the data in the bal array.
  389. */
  390. memset(bal, 0, ARRAY_SIZE(bal));
  391. while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) {
  392. c = &t->node;
  393. if (push_node_left(root, path,
  394. node_level(c->header.flags)) == 0)
  395. break;
  396. if (push_node_right(root, path,
  397. node_level(c->header.flags)) == 0)
  398. break;
  399. bal_start = bal_level;
  400. if (bal_level == MAX_LEVEL - 1)
  401. BUG();
  402. b_buffer = alloc_free_block(root);
  403. b = &b_buffer->node;
  404. b->header.flags = c->header.flags;
  405. b->header.blocknr = b_buffer->blocknr;
  406. mid = (c->header.nritems + 1) / 2;
  407. memcpy(b->keys, c->keys + mid,
  408. (c->header.nritems - mid) * sizeof(struct key));
  409. memcpy(b->blockptrs, c->blockptrs + mid,
  410. (c->header.nritems - mid) * sizeof(u64));
  411. b->header.nritems = c->header.nritems - mid;
  412. c->header.nritems = mid;
  413. write_tree_block(root, t);
  414. write_tree_block(root, b_buffer);
  415. bal[bal_level] = b_buffer;
  416. if (bal_level == MAX_LEVEL - 1)
  417. break;
  418. bal_level += 1;
  419. t = path->nodes[bal_level];
  420. }
  421. /*
  422. * bal_start tells us the first level in the tree that needed to
  423. * be split. Go through the bal array inserting the new nodes
  424. * as needed. The path is fixed as we go.
  425. */
  426. while(bal_start > 0) {
  427. b_buffer = bal[bal_start];
  428. c = &path->nodes[bal_start]->node;
  429. __insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr,
  430. path->slots[bal_start + 1] + 1, bal_start + 1);
  431. if (path->slots[bal_start] >= c->header.nritems) {
  432. path->slots[bal_start] -= c->header.nritems;
  433. tree_block_release(root, path->nodes[bal_start]);
  434. path->nodes[bal_start] = b_buffer;
  435. path->slots[bal_start + 1] += 1;
  436. } else {
  437. tree_block_release(root, b_buffer);
  438. }
  439. bal_start--;
  440. if (!bal[bal_start])
  441. break;
  442. }
  443. /* Now that the tree has room, insert the requested pointer */
  444. return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1,
  445. level);
  446. }
  447. /*
  448. * how many bytes are required to store the items in a leaf. start
  449. * and nr indicate which items in the leaf to check. This totals up the
  450. * space used both by the item structs and the item data
  451. */
  452. int leaf_space_used(struct leaf *l, int start, int nr)
  453. {
  454. int data_len;
  455. int end = start + nr - 1;
  456. if (!nr)
  457. return 0;
  458. data_len = l->items[start].offset + l->items[start].size;
  459. data_len = data_len - l->items[end].offset;
  460. data_len += sizeof(struct item) * nr;
  461. return data_len;
  462. }
  463. /*
  464. * push some data in the path leaf to the left, trying to free up at
  465. * least data_size bytes. returns zero if the push worked, nonzero otherwise
  466. */
  467. int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
  468. int data_size)
  469. {
  470. struct tree_buffer *right_buf = path->nodes[0];
  471. struct leaf *right = &right_buf->leaf;
  472. struct tree_buffer *t;
  473. struct leaf *left;
  474. int slot;
  475. int i;
  476. int free_space;
  477. int push_space = 0;
  478. int push_items = 0;
  479. struct item *item;
  480. int old_left_nritems;
  481. slot = path->slots[1];
  482. if (slot == 0) {
  483. return 1;
  484. }
  485. if (!path->nodes[1]) {
  486. return 1;
  487. }
  488. t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
  489. left = &t->leaf;
  490. free_space = leaf_free_space(left);
  491. if (free_space < data_size + sizeof(struct item)) {
  492. tree_block_release(root, t);
  493. return 1;
  494. }
  495. for (i = 0; i < right->header.nritems; i++) {
  496. item = right->items + i;
  497. if (path->slots[0] == i)
  498. push_space += data_size + sizeof(*item);
  499. if (item->size + sizeof(*item) + push_space > free_space)
  500. break;
  501. push_items++;
  502. push_space += item->size + sizeof(*item);
  503. }
  504. if (push_items == 0) {
  505. tree_block_release(root, t);
  506. return 1;
  507. }
  508. /* push data from right to left */
  509. memcpy(left->items + left->header.nritems,
  510. right->items, push_items * sizeof(struct item));
  511. push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
  512. memcpy(left->data + leaf_data_end(left) - push_space,
  513. right->data + right->items[push_items - 1].offset,
  514. push_space);
  515. old_left_nritems = left->header.nritems;
  516. BUG_ON(old_left_nritems < 0);
  517. for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
  518. left->items[i].offset -= LEAF_DATA_SIZE -
  519. left->items[old_left_nritems -1].offset;
  520. }
  521. left->header.nritems += push_items;
  522. /* fixup right node */
  523. push_space = right->items[push_items-1].offset - leaf_data_end(right);
  524. memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
  525. leaf_data_end(right), push_space);
  526. memmove(right->items, right->items + push_items,
  527. (right->header.nritems - push_items) * sizeof(struct item));
  528. right->header.nritems -= push_items;
  529. push_space = LEAF_DATA_SIZE;
  530. for (i = 0; i < right->header.nritems; i++) {
  531. right->items[i].offset = push_space - right->items[i].size;
  532. push_space = right->items[i].offset;
  533. }
  534. write_tree_block(root, t);
  535. write_tree_block(root, right_buf);
  536. fixup_low_keys(root, path, &right->items[0].key, 1);
  537. /* then fixup the leaf pointer in the path */
  538. if (path->slots[0] < push_items) {
  539. path->slots[0] += old_left_nritems;
  540. tree_block_release(root, path->nodes[0]);
  541. path->nodes[0] = t;
  542. path->slots[1] -= 1;
  543. } else {
  544. tree_block_release(root, t);
  545. path->slots[0] -= push_items;
  546. }
  547. BUG_ON(path->slots[0] < 0);
  548. return 0;
  549. }
  550. /*
  551. * split the path's leaf in two, making sure there is at least data_size
  552. * available for the resulting leaf level of the path.
  553. */
  554. int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
  555. {
  556. struct tree_buffer *l_buf = path->nodes[0];
  557. struct leaf *l = &l_buf->leaf;
  558. int nritems;
  559. int mid;
  560. int slot;
  561. struct leaf *right;
  562. struct tree_buffer *right_buffer;
  563. int space_needed = data_size + sizeof(struct item);
  564. int data_copy_size;
  565. int rt_data_off;
  566. int i;
  567. int ret;
  568. if (push_leaf_left(root, path, data_size) == 0) {
  569. l_buf = path->nodes[0];
  570. l = &l_buf->leaf;
  571. if (leaf_free_space(l) >= sizeof(struct item) + data_size)
  572. return 0;
  573. }
  574. slot = path->slots[0];
  575. nritems = l->header.nritems;
  576. mid = (nritems + 1)/ 2;
  577. right_buffer = alloc_free_block(root);
  578. BUG_ON(!right_buffer);
  579. BUG_ON(mid == nritems);
  580. right = &right_buffer->leaf;
  581. memset(right, 0, sizeof(*right));
  582. if (mid <= slot) {
  583. if (leaf_space_used(l, mid, nritems - mid) + space_needed >
  584. LEAF_DATA_SIZE)
  585. BUG();
  586. } else {
  587. if (leaf_space_used(l, 0, mid + 1) + space_needed >
  588. LEAF_DATA_SIZE)
  589. BUG();
  590. }
  591. right->header.nritems = nritems - mid;
  592. right->header.blocknr = right_buffer->blocknr;
  593. right->header.flags = node_level(0);
  594. data_copy_size = l->items[mid].offset + l->items[mid].size -
  595. leaf_data_end(l);
  596. memcpy(right->items, l->items + mid,
  597. (nritems - mid) * sizeof(struct item));
  598. memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
  599. l->data + leaf_data_end(l), data_copy_size);
  600. rt_data_off = LEAF_DATA_SIZE -
  601. (l->items[mid].offset + l->items[mid].size);
  602. for (i = 0; i < right->header.nritems; i++)
  603. right->items[i].offset += rt_data_off;
  604. l->header.nritems = mid;
  605. ret = insert_ptr(root, path, &right->items[0].key,
  606. right_buffer->blocknr, 1);
  607. write_tree_block(root, right_buffer);
  608. write_tree_block(root, l_buf);
  609. BUG_ON(path->slots[0] != slot);
  610. if (mid <= slot) {
  611. tree_block_release(root, path->nodes[0]);
  612. path->nodes[0] = right_buffer;
  613. path->slots[0] -= mid;
  614. path->slots[1] += 1;
  615. } else
  616. tree_block_release(root, right_buffer);
  617. BUG_ON(path->slots[0] < 0);
  618. return ret;
  619. }
  620. /*
  621. * Given a key and some data, insert an item into the tree.
  622. * This does all the path init required, making room in the tree if needed.
  623. */
  624. int insert_item(struct ctree_root *root, struct key *key,
  625. void *data, int data_size)
  626. {
  627. int ret;
  628. int slot;
  629. int slot_orig;
  630. struct leaf *leaf;
  631. struct tree_buffer *leaf_buf;
  632. unsigned int nritems;
  633. unsigned int data_end;
  634. struct ctree_path path;
  635. /* create a root if there isn't one */
  636. if (!root->node) {
  637. struct tree_buffer *t;
  638. t = alloc_free_block(root);
  639. BUG_ON(!t);
  640. t->node.header.nritems = 0;
  641. t->node.header.flags = node_level(0);
  642. t->node.header.blocknr = t->blocknr;
  643. root->node = t;
  644. write_tree_block(root, t);
  645. }
  646. init_path(&path);
  647. ret = search_slot(root, key, &path);
  648. if (ret == 0) {
  649. release_path(root, &path);
  650. return -EEXIST;
  651. }
  652. slot_orig = path.slots[0];
  653. leaf_buf = path.nodes[0];
  654. leaf = &leaf_buf->leaf;
  655. /* make room if needed */
  656. if (leaf_free_space(leaf) < sizeof(struct item) + data_size) {
  657. split_leaf(root, &path, data_size);
  658. leaf_buf = path.nodes[0];
  659. leaf = &path.nodes[0]->leaf;
  660. }
  661. nritems = leaf->header.nritems;
  662. data_end = leaf_data_end(leaf);
  663. if (leaf_free_space(leaf) < sizeof(struct item) + data_size)
  664. BUG();
  665. slot = path.slots[0];
  666. BUG_ON(slot < 0);
  667. if (slot == 0)
  668. fixup_low_keys(root, &path, key, 1);
  669. if (slot != nritems) {
  670. int i;
  671. unsigned int old_data = leaf->items[slot].offset +
  672. leaf->items[slot].size;
  673. /*
  674. * item0..itemN ... dataN.offset..dataN.size .. data0.size
  675. */
  676. /* first correct the data pointers */
  677. for (i = slot; i < nritems; i++)
  678. leaf->items[i].offset -= data_size;
  679. /* shift the items */
  680. memmove(leaf->items + slot + 1, leaf->items + slot,
  681. (nritems - slot) * sizeof(struct item));
  682. /* shift the data */
  683. memmove(leaf->data + data_end - data_size, leaf->data +
  684. data_end, old_data - data_end);
  685. data_end = old_data;
  686. }
  687. /* copy the new data in */
  688. memcpy(&leaf->items[slot].key, key, sizeof(struct key));
  689. leaf->items[slot].offset = data_end - data_size;
  690. leaf->items[slot].size = data_size;
  691. memcpy(leaf->data + data_end - data_size, data, data_size);
  692. leaf->header.nritems += 1;
  693. write_tree_block(root, leaf_buf);
  694. if (leaf_free_space(leaf) < 0)
  695. BUG();
  696. release_path(root, &path);
  697. return 0;
  698. }
  699. /*
  700. * delete the pointer from a given level in the path. The path is not
  701. * fixed up, so after calling this it is not valid at that level.
  702. *
  703. * If the delete empties a node, the node is removed from the tree,
  704. * continuing all the way the root if required. The root is converted into
  705. * a leaf if all the nodes are emptied.
  706. */
  707. int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
  708. {
  709. int slot;
  710. struct tree_buffer *t;
  711. struct node *node;
  712. int nritems;
  713. while(1) {
  714. t = path->nodes[level];
  715. if (!t)
  716. break;
  717. node = &t->node;
  718. slot = path->slots[level];
  719. nritems = node->header.nritems;
  720. if (slot != nritems -1) {
  721. memmove(node->keys + slot, node->keys + slot + 1,
  722. sizeof(struct key) * (nritems - slot - 1));
  723. memmove(node->blockptrs + slot,
  724. node->blockptrs + slot + 1,
  725. sizeof(u64) * (nritems - slot - 1));
  726. }
  727. node->header.nritems--;
  728. write_tree_block(root, t);
  729. if (node->header.nritems != 0) {
  730. int tslot;
  731. if (slot == 0)
  732. fixup_low_keys(root, path, node->keys,
  733. level + 1);
  734. tslot = path->slots[level+1];
  735. t->count++;
  736. push_node_left(root, path, level);
  737. if (node->header.nritems) {
  738. push_node_right(root, path, level);
  739. }
  740. if (node->header.nritems) {
  741. tree_block_release(root, t);
  742. break;
  743. }
  744. tree_block_release(root, t);
  745. path->slots[level+1] = tslot;
  746. }
  747. if (t == root->node) {
  748. /* just turn the root into a leaf and break */
  749. root->node->node.header.flags = node_level(0);
  750. write_tree_block(root, t);
  751. break;
  752. }
  753. level++;
  754. if (!path->nodes[level])
  755. BUG();
  756. }
  757. return 0;
  758. }
  759. /*
  760. * delete the item at the leaf level in path. If that empties
  761. * the leaf, remove it from the tree
  762. */
  763. int del_item(struct ctree_root *root, struct ctree_path *path)
  764. {
  765. int slot;
  766. struct leaf *leaf;
  767. struct tree_buffer *leaf_buf;
  768. int doff;
  769. int dsize;
  770. leaf_buf = path->nodes[0];
  771. leaf = &leaf_buf->leaf;
  772. slot = path->slots[0];
  773. doff = leaf->items[slot].offset;
  774. dsize = leaf->items[slot].size;
  775. if (slot != leaf->header.nritems - 1) {
  776. int i;
  777. int data_end = leaf_data_end(leaf);
  778. memmove(leaf->data + data_end + dsize,
  779. leaf->data + data_end,
  780. doff - data_end);
  781. for (i = slot + 1; i < leaf->header.nritems; i++)
  782. leaf->items[i].offset += dsize;
  783. memmove(leaf->items + slot, leaf->items + slot + 1,
  784. sizeof(struct item) *
  785. (leaf->header.nritems - slot - 1));
  786. }
  787. leaf->header.nritems -= 1;
  788. /* delete the leaf if we've emptied it */
  789. if (leaf->header.nritems == 0) {
  790. if (leaf_buf == root->node) {
  791. leaf->header.flags = node_level(0);
  792. write_tree_block(root, leaf_buf);
  793. } else
  794. del_ptr(root, path, 1);
  795. } else {
  796. if (slot == 0)
  797. fixup_low_keys(root, path, &leaf->items[0].key, 1);
  798. write_tree_block(root, leaf_buf);
  799. /* delete the leaf if it is mostly empty */
  800. if (leaf_space_used(leaf, 0, leaf->header.nritems) <
  801. LEAF_DATA_SIZE / 4) {
  802. /* push_leaf_left fixes the path.
  803. * make sure the path still points to our leaf
  804. * for possible call to del_ptr below
  805. */
  806. slot = path->slots[1];
  807. leaf_buf->count++;
  808. push_leaf_left(root, path, 1);
  809. if (leaf->header.nritems == 0) {
  810. path->slots[1] = slot;
  811. del_ptr(root, path, 1);
  812. }
  813. tree_block_release(root, leaf_buf);
  814. }
  815. }
  816. return 0;
  817. }
  818. void print_leaf(struct leaf *l)
  819. {
  820. int i;
  821. int nr = l->header.nritems;
  822. struct item *item;
  823. printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr,
  824. leaf_free_space(l));
  825. fflush(stdout);
  826. for (i = 0 ; i < nr ; i++) {
  827. item = l->items + i;
  828. printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n",
  829. i,
  830. item->key.objectid, item->key.flags, item->key.offset,
  831. item->offset, item->size);
  832. fflush(stdout);
  833. printf("\t\titem data %.*s\n", item->size, l->data+item->offset);
  834. fflush(stdout);
  835. }
  836. }
  837. void print_tree(struct ctree_root *root, struct tree_buffer *t)
  838. {
  839. int i;
  840. int nr;
  841. struct node *c;
  842. if (!t)
  843. return;
  844. c = &t->node;
  845. nr = c->header.nritems;
  846. if (c->header.blocknr != t->blocknr)
  847. BUG();
  848. if (is_leaf(c->header.flags)) {
  849. print_leaf((struct leaf *)c);
  850. return;
  851. }
  852. printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr,
  853. node_level(c->header.flags), c->header.nritems,
  854. NODEPTRS_PER_BLOCK - c->header.nritems);
  855. fflush(stdout);
  856. for (i = 0; i < nr; i++) {
  857. printf("\tkey %d (%lu %u %lu) block %lu\n",
  858. i,
  859. c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset,
  860. c->blockptrs[i]);
  861. fflush(stdout);
  862. }
  863. for (i = 0; i < nr; i++) {
  864. struct tree_buffer *next_buf = read_tree_block(root,
  865. c->blockptrs[i]);
  866. struct node *next = &next_buf->node;
  867. if (is_leaf(next->header.flags) &&
  868. node_level(c->header.flags) != 1)
  869. BUG();
  870. if (node_level(next->header.flags) !=
  871. node_level(c->header.flags) - 1)
  872. BUG();
  873. print_tree(root, next_buf);
  874. tree_block_release(root, next_buf);
  875. }
  876. }
  877. /* for testing only */
  878. int next_key(int i, int max_key) {
  879. return rand() % max_key;
  880. // return i;
  881. }
  882. int main() {
  883. struct ctree_root *root;
  884. struct key ins;
  885. struct key last = { (u64)-1, 0, 0};
  886. char *buf;
  887. int i;
  888. int num;
  889. int ret;
  890. int run_size = 25000;
  891. int max_key = 100000000;
  892. int tree_size = 0;
  893. struct ctree_path path;
  894. radix_tree_init();
  895. root = open_ctree("dbfile");
  896. srand(55);
  897. for (i = 0; i < run_size; i++) {
  898. buf = malloc(64);
  899. num = next_key(i, max_key);
  900. // num = i;
  901. sprintf(buf, "string-%d", num);
  902. // printf("insert %d\n", num);
  903. ins.objectid = num;
  904. ins.offset = 0;
  905. ins.flags = 0;
  906. ret = insert_item(root, &ins, buf, strlen(buf));
  907. if (!ret)
  908. tree_size++;
  909. }
  910. close_ctree(root);
  911. root = open_ctree("dbfile");
  912. printf("starting search\n");
  913. srand(55);
  914. for (i = 0; i < run_size; i++) {
  915. num = next_key(i, max_key);
  916. ins.objectid = num;
  917. init_path(&path);
  918. ret = search_slot(root, &ins, &path);
  919. if (ret) {
  920. print_tree(root, root->node);
  921. printf("unable to find %d\n", num);
  922. exit(1);
  923. }
  924. release_path(root, &path);
  925. }
  926. close_ctree(root);
  927. root = open_ctree("dbfile");
  928. printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
  929. node_level(root->node->node.header.flags),
  930. root->node->node.header.nritems,
  931. NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
  932. printf("all searches good, deleting some items\n");
  933. i = 0;
  934. srand(55);
  935. for (i = 0 ; i < run_size/4; i++) {
  936. num = next_key(i, max_key);
  937. ins.objectid = num;
  938. init_path(&path);
  939. ret = search_slot(root, &ins, &path);
  940. if (ret)
  941. continue;
  942. ret = del_item(root, &path);
  943. if (ret != 0)
  944. BUG();
  945. release_path(root, &path);
  946. tree_size--;
  947. }
  948. srand(128);
  949. for (i = 0; i < run_size; i++) {
  950. buf = malloc(64);
  951. num = next_key(i, max_key);
  952. sprintf(buf, "string-%d", num);
  953. ins.objectid = num;
  954. ret = insert_item(root, &ins, buf, strlen(buf));
  955. if (!ret)
  956. tree_size++;
  957. }
  958. close_ctree(root);
  959. root = open_ctree("dbfile");
  960. printf("starting search2\n");
  961. srand(128);
  962. for (i = 0; i < run_size; i++) {
  963. num = next_key(i, max_key);
  964. ins.objectid = num;
  965. init_path(&path);
  966. ret = search_slot(root, &ins, &path);
  967. if (ret) {
  968. print_tree(root, root->node);
  969. printf("unable to find %d\n", num);
  970. exit(1);
  971. }
  972. release_path(root, &path);
  973. }
  974. printf("starting big long delete run\n");
  975. while(root->node && root->node->node.header.nritems > 0) {
  976. struct leaf *leaf;
  977. int slot;
  978. ins.objectid = (u64)-1;
  979. init_path(&path);
  980. ret = search_slot(root, &ins, &path);
  981. if (ret == 0)
  982. BUG();
  983. leaf = &path.nodes[0]->leaf;
  984. slot = path.slots[0];
  985. if (slot != leaf->header.nritems)
  986. BUG();
  987. while(path.slots[0] > 0) {
  988. path.slots[0] -= 1;
  989. slot = path.slots[0];
  990. leaf = &path.nodes[0]->leaf;
  991. if (comp_keys(&last, &leaf->items[slot].key) <= 0)
  992. BUG();
  993. memcpy(&last, &leaf->items[slot].key, sizeof(last));
  994. ret = del_item(root, &path);
  995. if (ret != 0) {
  996. printf("del_item returned %d\n", ret);
  997. BUG();
  998. }
  999. tree_size--;
  1000. }
  1001. release_path(root, &path);
  1002. }
  1003. close_ctree(root);
  1004. printf("tree size is now %d\n", tree_size);
  1005. return 0;
  1006. }