ctree.c 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include "kerncompat.h"
  4. #include "radix-tree.h"
  5. #include "ctree.h"
  6. #include "disk-io.h"
  7. #include "print-tree.h"
  8. static int split_node(struct ctree_root *root, struct ctree_path *path,
  9. int level);
  10. static int split_leaf(struct ctree_root *root, struct ctree_path *path,
  11. int data_size);
  12. static int push_node_left(struct ctree_root *root, struct tree_buffer *dst,
  13. struct tree_buffer *src);
  14. static int balance_node_right(struct ctree_root *root,
  15. struct tree_buffer *dst_buf,
  16. struct tree_buffer *src_buf);
  17. static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level,
  18. int slot);
  19. inline void init_path(struct ctree_path *p)
  20. {
  21. memset(p, 0, sizeof(*p));
  22. }
  23. void release_path(struct ctree_root *root, struct ctree_path *p)
  24. {
  25. int i;
  26. for (i = 0; i < MAX_LEVEL; i++) {
  27. if (!p->nodes[i])
  28. break;
  29. tree_block_release(root, p->nodes[i]);
  30. }
  31. memset(p, 0, sizeof(*p));
  32. }
  33. /*
  34. * The leaf data grows from end-to-front in the node.
  35. * this returns the address of the start of the last item,
  36. * which is the stop of the leaf data stack
  37. */
  38. static inline unsigned int leaf_data_end(struct leaf *leaf)
  39. {
  40. unsigned int nr = leaf->header.nritems;
  41. if (nr == 0)
  42. return sizeof(leaf->data);
  43. return leaf->items[nr-1].offset;
  44. }
  45. /*
  46. * The space between the end of the leaf items and
  47. * the start of the leaf data. IOW, how much room
  48. * the leaf has left for both items and data
  49. */
  50. int leaf_free_space(struct leaf *leaf)
  51. {
  52. int data_end = leaf_data_end(leaf);
  53. int nritems = leaf->header.nritems;
  54. char *items_end = (char *)(leaf->items + nritems + 1);
  55. return (char *)(leaf->data + data_end) - (char *)items_end;
  56. }
  57. /*
  58. * compare two keys in a memcmp fashion
  59. */
  60. int comp_keys(struct key *k1, struct key *k2)
  61. {
  62. if (k1->objectid > k2->objectid)
  63. return 1;
  64. if (k1->objectid < k2->objectid)
  65. return -1;
  66. if (k1->flags > k2->flags)
  67. return 1;
  68. if (k1->flags < k2->flags)
  69. return -1;
  70. if (k1->offset > k2->offset)
  71. return 1;
  72. if (k1->offset < k2->offset)
  73. return -1;
  74. return 0;
  75. }
  76. int check_node(struct ctree_path *path, int level)
  77. {
  78. int i;
  79. struct node *parent = NULL;
  80. struct node *node = &path->nodes[level]->node;
  81. int parent_slot;
  82. if (path->nodes[level + 1])
  83. parent = &path->nodes[level + 1]->node;
  84. parent_slot = path->slots[level + 1];
  85. if (parent && node->header.nritems > 0) {
  86. struct key *parent_key;
  87. parent_key = &parent->keys[parent_slot];
  88. BUG_ON(memcmp(parent_key, node->keys, sizeof(struct key)));
  89. BUG_ON(parent->blockptrs[parent_slot] != node->header.blocknr);
  90. }
  91. BUG_ON(node->header.nritems > NODEPTRS_PER_BLOCK);
  92. for (i = 0; i < node->header.nritems - 2; i++) {
  93. BUG_ON(comp_keys(&node->keys[i], &node->keys[i+1]) >= 0);
  94. }
  95. return 0;
  96. }
  97. int check_leaf(struct ctree_path *path, int level)
  98. {
  99. int i;
  100. struct leaf *leaf = &path->nodes[level]->leaf;
  101. struct node *parent = NULL;
  102. int parent_slot;
  103. if (path->nodes[level + 1])
  104. parent = &path->nodes[level + 1]->node;
  105. parent_slot = path->slots[level + 1];
  106. if (parent && leaf->header.nritems > 0) {
  107. struct key *parent_key;
  108. parent_key = &parent->keys[parent_slot];
  109. BUG_ON(memcmp(parent_key, &leaf->items[0].key,
  110. sizeof(struct key)));
  111. BUG_ON(parent->blockptrs[parent_slot] != leaf->header.blocknr);
  112. }
  113. for (i = 0; i < leaf->header.nritems - 2; i++) {
  114. BUG_ON(comp_keys(&leaf->items[i].key,
  115. &leaf->items[i+1].key) >= 0);
  116. BUG_ON(leaf->items[i].offset != leaf->items[i + 1].offset +
  117. leaf->items[i + 1].size);
  118. if (i == 0) {
  119. BUG_ON(leaf->items[i].offset + leaf->items[i].size !=
  120. LEAF_DATA_SIZE);
  121. }
  122. }
  123. BUG_ON(leaf_free_space(leaf) < 0);
  124. return 0;
  125. }
  126. int check_block(struct ctree_path *path, int level)
  127. {
  128. if (level == 0)
  129. return check_leaf(path, level);
  130. return check_node(path, level);
  131. }
  132. /*
  133. * search for key in the array p. items p are item_size apart
  134. * and there are 'max' items in p
  135. * the slot in the array is returned via slot, and it points to
  136. * the place where you would insert key if it is not found in
  137. * the array.
  138. *
  139. * slot may point to max if the key is bigger than all of the keys
  140. */
  141. int generic_bin_search(char *p, int item_size, struct key *key,
  142. int max, int *slot)
  143. {
  144. int low = 0;
  145. int high = max;
  146. int mid;
  147. int ret;
  148. struct key *tmp;
  149. while(low < high) {
  150. mid = (low + high) / 2;
  151. tmp = (struct key *)(p + mid * item_size);
  152. ret = comp_keys(tmp, key);
  153. if (ret < 0)
  154. low = mid + 1;
  155. else if (ret > 0)
  156. high = mid;
  157. else {
  158. *slot = mid;
  159. return 0;
  160. }
  161. }
  162. *slot = low;
  163. return 1;
  164. }
  165. /*
  166. * simple bin_search frontend that does the right thing for
  167. * leaves vs nodes
  168. */
  169. int bin_search(struct node *c, struct key *key, int *slot)
  170. {
  171. if (is_leaf(c->header.flags)) {
  172. struct leaf *l = (struct leaf *)c;
  173. return generic_bin_search((void *)l->items, sizeof(struct item),
  174. key, c->header.nritems, slot);
  175. } else {
  176. return generic_bin_search((void *)c->keys, sizeof(struct key),
  177. key, c->header.nritems, slot);
  178. }
  179. return -1;
  180. }
  181. struct tree_buffer *read_node_slot(struct ctree_root *root,
  182. struct tree_buffer *parent_buf,
  183. int slot)
  184. {
  185. struct node *node = &parent_buf->node;
  186. if (slot < 0)
  187. return NULL;
  188. if (slot >= node->header.nritems)
  189. return NULL;
  190. return read_tree_block(root, node->blockptrs[slot]);
  191. }
  192. static int balance_level(struct ctree_root *root, struct ctree_path *path,
  193. int level)
  194. {
  195. struct tree_buffer *right_buf;
  196. struct tree_buffer *mid_buf;
  197. struct tree_buffer *left_buf;
  198. struct tree_buffer *parent_buf = NULL;
  199. struct node *right = NULL;
  200. struct node *mid;
  201. struct node *left = NULL;
  202. struct node *parent = NULL;
  203. int ret = 0;
  204. int wret;
  205. int pslot;
  206. int orig_slot = path->slots[level];
  207. u64 orig_ptr;
  208. if (level == 0)
  209. return 0;
  210. mid_buf = path->nodes[level];
  211. mid = &mid_buf->node;
  212. orig_ptr = mid->blockptrs[orig_slot];
  213. if (level < MAX_LEVEL - 1)
  214. parent_buf = path->nodes[level + 1];
  215. pslot = path->slots[level + 1];
  216. if (!parent_buf) {
  217. struct tree_buffer *child;
  218. u64 blocknr = mid_buf->blocknr;
  219. if (mid->header.nritems != 1)
  220. return 0;
  221. /* promote the child to a root */
  222. child = read_node_slot(root, mid_buf, 0);
  223. BUG_ON(!child);
  224. root->node = child;
  225. path->nodes[level] = NULL;
  226. /* once for the path */
  227. tree_block_release(root, mid_buf);
  228. /* once for the root ptr */
  229. tree_block_release(root, mid_buf);
  230. return free_extent(root, blocknr, 1);
  231. }
  232. parent = &parent_buf->node;
  233. if (mid->header.nritems > NODEPTRS_PER_BLOCK / 4)
  234. return 0;
  235. left_buf = read_node_slot(root, parent_buf, pslot - 1);
  236. right_buf = read_node_slot(root, parent_buf, pslot + 1);
  237. /* first, try to make some room in the middle buffer */
  238. if (left_buf) {
  239. left = &left_buf->node;
  240. orig_slot += left->header.nritems;
  241. wret = push_node_left(root, left_buf, mid_buf);
  242. if (wret < 0)
  243. ret = wret;
  244. }
  245. /*
  246. * then try to empty the right most buffer into the middle
  247. */
  248. if (right_buf) {
  249. right = &right_buf->node;
  250. wret = push_node_left(root, mid_buf, right_buf);
  251. if (wret < 0)
  252. ret = wret;
  253. if (right->header.nritems == 0) {
  254. u64 blocknr = right_buf->blocknr;
  255. tree_block_release(root, right_buf);
  256. right_buf = NULL;
  257. right = NULL;
  258. wret = del_ptr(root, path, level + 1, pslot + 1);
  259. if (wret)
  260. ret = wret;
  261. wret = free_extent(root, blocknr, 1);
  262. if (wret)
  263. ret = wret;
  264. } else {
  265. memcpy(parent->keys + pslot + 1, right->keys,
  266. sizeof(struct key));
  267. wret = write_tree_block(root, parent_buf);
  268. if (wret)
  269. ret = wret;
  270. }
  271. }
  272. if (mid->header.nritems == 1) {
  273. /*
  274. * we're not allowed to leave a node with one item in the
  275. * tree during a delete. A deletion from lower in the tree
  276. * could try to delete the only pointer in this node.
  277. * So, pull some keys from the left.
  278. * There has to be a left pointer at this point because
  279. * otherwise we would have pulled some pointers from the
  280. * right
  281. */
  282. BUG_ON(!left_buf);
  283. wret = balance_node_right(root, mid_buf, left_buf);
  284. if (wret < 0)
  285. ret = wret;
  286. BUG_ON(wret == 1);
  287. }
  288. if (mid->header.nritems == 0) {
  289. /* we've managed to empty the middle node, drop it */
  290. u64 blocknr = mid_buf->blocknr;
  291. tree_block_release(root, mid_buf);
  292. mid_buf = NULL;
  293. mid = NULL;
  294. wret = del_ptr(root, path, level + 1, pslot);
  295. if (wret)
  296. ret = wret;
  297. wret = free_extent(root, blocknr, 1);
  298. if (wret)
  299. ret = wret;
  300. } else {
  301. /* update the parent key to reflect our changes */
  302. memcpy(parent->keys + pslot, mid->keys, sizeof(struct key));
  303. wret = write_tree_block(root, parent_buf);
  304. if (wret)
  305. ret = wret;
  306. }
  307. /* update the path */
  308. if (left_buf) {
  309. if (left->header.nritems > orig_slot) {
  310. left_buf->count++; // released below
  311. path->nodes[level] = left_buf;
  312. path->slots[level + 1] -= 1;
  313. path->slots[level] = orig_slot;
  314. if (mid_buf)
  315. tree_block_release(root, mid_buf);
  316. } else {
  317. orig_slot -= left->header.nritems;
  318. path->slots[level] = orig_slot;
  319. }
  320. }
  321. /* double check we haven't messed things up */
  322. check_block(path, level);
  323. if (orig_ptr != path->nodes[level]->node.blockptrs[path->slots[level]])
  324. BUG();
  325. if (right_buf)
  326. tree_block_release(root, right_buf);
  327. if (left_buf)
  328. tree_block_release(root, left_buf);
  329. return ret;
  330. }
  331. /*
  332. * look for key in the tree. path is filled in with nodes along the way
  333. * if key is found, we return zero and you can find the item in the leaf
  334. * level of the path (level 0)
  335. *
  336. * If the key isn't found, the path points to the slot where it should
  337. * be inserted, and 1 is returned. If there are other errors during the
  338. * search a negative error number is returned.
  339. *
  340. * if ins_len > 0, nodes and leaves will be split as we walk down the
  341. * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
  342. * possible)
  343. */
  344. int search_slot(struct ctree_root *root, struct key *key,
  345. struct ctree_path *p, int ins_len)
  346. {
  347. struct tree_buffer *b;
  348. struct node *c;
  349. int slot;
  350. int ret;
  351. int level;
  352. again:
  353. b = root->node;
  354. b->count++;
  355. while (b) {
  356. c = &b->node;
  357. level = node_level(c->header.flags);
  358. p->nodes[level] = b;
  359. ret = check_block(p, level);
  360. if (ret)
  361. return -1;
  362. ret = bin_search(c, key, &slot);
  363. if (!is_leaf(c->header.flags)) {
  364. if (ret && slot > 0)
  365. slot -= 1;
  366. p->slots[level] = slot;
  367. if (ins_len > 0 &&
  368. c->header.nritems == NODEPTRS_PER_BLOCK) {
  369. int sret = split_node(root, p, level);
  370. BUG_ON(sret > 0);
  371. if (sret)
  372. return sret;
  373. b = p->nodes[level];
  374. c = &b->node;
  375. slot = p->slots[level];
  376. } else if (ins_len < 0) {
  377. int sret = balance_level(root, p, level);
  378. if (sret)
  379. return sret;
  380. b = p->nodes[level];
  381. if (!b)
  382. goto again;
  383. c = &b->node;
  384. slot = p->slots[level];
  385. BUG_ON(c->header.nritems == 1);
  386. }
  387. b = read_tree_block(root, c->blockptrs[slot]);
  388. } else {
  389. struct leaf *l = (struct leaf *)c;
  390. p->slots[level] = slot;
  391. if (ins_len > 0 && leaf_free_space(l) <
  392. sizeof(struct item) + ins_len) {
  393. int sret = split_leaf(root, p, ins_len);
  394. BUG_ON(sret > 0);
  395. if (sret)
  396. return sret;
  397. }
  398. BUG_ON(root->node->count == 1);
  399. return ret;
  400. }
  401. }
  402. BUG_ON(root->node->count == 1);
  403. return 1;
  404. }
  405. /*
  406. * adjust the pointers going up the tree, starting at level
  407. * making sure the right key of each node is points to 'key'.
  408. * This is used after shifting pointers to the left, so it stops
  409. * fixing up pointers when a given leaf/node is not in slot 0 of the
  410. * higher levels
  411. *
  412. * If this fails to write a tree block, it returns -1, but continues
  413. * fixing up the blocks in ram so the tree is consistent.
  414. */
  415. static int fixup_low_keys(struct ctree_root *root,
  416. struct ctree_path *path, struct key *key,
  417. int level)
  418. {
  419. int i;
  420. int ret = 0;
  421. int wret;
  422. for (i = level; i < MAX_LEVEL; i++) {
  423. struct node *t;
  424. int tslot = path->slots[i];
  425. if (!path->nodes[i])
  426. break;
  427. t = &path->nodes[i]->node;
  428. memcpy(t->keys + tslot, key, sizeof(*key));
  429. wret = write_tree_block(root, path->nodes[i]);
  430. if (wret)
  431. ret = wret;
  432. if (tslot != 0)
  433. break;
  434. }
  435. return ret;
  436. }
  437. /*
  438. * try to push data from one node into the next node left in the
  439. * tree.
  440. *
  441. * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
  442. * error, and > 0 if there was no room in the left hand block.
  443. */
  444. static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf,
  445. struct tree_buffer *src_buf)
  446. {
  447. struct node *src = &src_buf->node;
  448. struct node *dst = &dst_buf->node;
  449. int push_items = 0;
  450. int src_nritems;
  451. int dst_nritems;
  452. int ret = 0;
  453. int wret;
  454. src_nritems = src->header.nritems;
  455. dst_nritems = dst->header.nritems;
  456. push_items = NODEPTRS_PER_BLOCK - dst_nritems;
  457. if (push_items <= 0) {
  458. return 1;
  459. }
  460. if (src_nritems < push_items)
  461. push_items = src_nritems;
  462. memcpy(dst->keys + dst_nritems, src->keys,
  463. push_items * sizeof(struct key));
  464. memcpy(dst->blockptrs + dst_nritems, src->blockptrs,
  465. push_items * sizeof(u64));
  466. if (push_items < src_nritems) {
  467. memmove(src->keys, src->keys + push_items,
  468. (src_nritems - push_items) * sizeof(struct key));
  469. memmove(src->blockptrs, src->blockptrs + push_items,
  470. (src_nritems - push_items) * sizeof(u64));
  471. }
  472. src->header.nritems -= push_items;
  473. dst->header.nritems += push_items;
  474. wret = write_tree_block(root, src_buf);
  475. if (wret < 0)
  476. ret = wret;
  477. wret = write_tree_block(root, dst_buf);
  478. if (wret < 0)
  479. ret = wret;
  480. return ret;
  481. }
  482. /*
  483. * try to push data from one node into the next node right in the
  484. * tree.
  485. *
  486. * returns 0 if some ptrs were pushed, < 0 if there was some horrible
  487. * error, and > 0 if there was no room in the right hand block.
  488. *
  489. * this will only push up to 1/2 the contents of the left node over
  490. */
  491. static int balance_node_right(struct ctree_root *root,
  492. struct tree_buffer *dst_buf,
  493. struct tree_buffer *src_buf)
  494. {
  495. struct node *src = &src_buf->node;
  496. struct node *dst = &dst_buf->node;
  497. int push_items = 0;
  498. int max_push;
  499. int src_nritems;
  500. int dst_nritems;
  501. int ret = 0;
  502. int wret;
  503. src_nritems = src->header.nritems;
  504. dst_nritems = dst->header.nritems;
  505. push_items = NODEPTRS_PER_BLOCK - dst_nritems;
  506. if (push_items <= 0) {
  507. return 1;
  508. }
  509. max_push = src_nritems / 2 + 1;
  510. /* don't try to empty the node */
  511. if (max_push > src_nritems)
  512. return 1;
  513. if (max_push < push_items)
  514. push_items = max_push;
  515. memmove(dst->keys + push_items, dst->keys,
  516. dst_nritems * sizeof(struct key));
  517. memmove(dst->blockptrs + push_items, dst->blockptrs,
  518. dst_nritems * sizeof(u64));
  519. memcpy(dst->keys, src->keys + src_nritems - push_items,
  520. push_items * sizeof(struct key));
  521. memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
  522. push_items * sizeof(u64));
  523. src->header.nritems -= push_items;
  524. dst->header.nritems += push_items;
  525. wret = write_tree_block(root, src_buf);
  526. if (wret < 0)
  527. ret = wret;
  528. wret = write_tree_block(root, dst_buf);
  529. if (wret < 0)
  530. ret = wret;
  531. return ret;
  532. }
  533. /*
  534. * helper function to insert a new root level in the tree.
  535. * A new node is allocated, and a single item is inserted to
  536. * point to the existing root
  537. *
  538. * returns zero on success or < 0 on failure.
  539. */
  540. static int insert_new_root(struct ctree_root *root,
  541. struct ctree_path *path, int level)
  542. {
  543. struct tree_buffer *t;
  544. struct node *lower;
  545. struct node *c;
  546. struct key *lower_key;
  547. BUG_ON(path->nodes[level]);
  548. BUG_ON(path->nodes[level-1] != root->node);
  549. t = alloc_free_block(root);
  550. c = &t->node;
  551. memset(c, 0, sizeof(c));
  552. c->header.nritems = 1;
  553. c->header.flags = node_level(level);
  554. c->header.blocknr = t->blocknr;
  555. c->header.parentid = root->node->node.header.parentid;
  556. lower = &path->nodes[level-1]->node;
  557. if (is_leaf(lower->header.flags))
  558. lower_key = &((struct leaf *)lower)->items[0].key;
  559. else
  560. lower_key = lower->keys;
  561. memcpy(c->keys, lower_key, sizeof(struct key));
  562. c->blockptrs[0] = path->nodes[level-1]->blocknr;
  563. /* the super has an extra ref to root->node */
  564. tree_block_release(root, root->node);
  565. root->node = t;
  566. t->count++;
  567. write_tree_block(root, t);
  568. path->nodes[level] = t;
  569. path->slots[level] = 0;
  570. return 0;
  571. }
  572. /*
  573. * worker function to insert a single pointer in a node.
  574. * the node should have enough room for the pointer already
  575. *
  576. * slot and level indicate where you want the key to go, and
  577. * blocknr is the block the key points to.
  578. *
  579. * returns zero on success and < 0 on any error
  580. */
  581. static int insert_ptr(struct ctree_root *root,
  582. struct ctree_path *path, struct key *key,
  583. u64 blocknr, int slot, int level)
  584. {
  585. struct node *lower;
  586. int nritems;
  587. BUG_ON(!path->nodes[level]);
  588. lower = &path->nodes[level]->node;
  589. nritems = lower->header.nritems;
  590. if (slot > nritems)
  591. BUG();
  592. if (nritems == NODEPTRS_PER_BLOCK)
  593. BUG();
  594. if (slot != nritems) {
  595. memmove(lower->keys + slot + 1, lower->keys + slot,
  596. (nritems - slot) * sizeof(struct key));
  597. memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
  598. (nritems - slot) * sizeof(u64));
  599. }
  600. memcpy(lower->keys + slot, key, sizeof(struct key));
  601. lower->blockptrs[slot] = blocknr;
  602. lower->header.nritems++;
  603. if (lower->keys[1].objectid == 0)
  604. BUG();
  605. write_tree_block(root, path->nodes[level]);
  606. return 0;
  607. }
  608. /*
  609. * split the node at the specified level in path in two.
  610. * The path is corrected to point to the appropriate node after the split
  611. *
  612. * Before splitting this tries to make some room in the node by pushing
  613. * left and right, if either one works, it returns right away.
  614. *
  615. * returns 0 on success and < 0 on failure
  616. */
  617. static int split_node(struct ctree_root *root, struct ctree_path *path,
  618. int level)
  619. {
  620. struct tree_buffer *t;
  621. struct node *c;
  622. struct tree_buffer *split_buffer;
  623. struct node *split;
  624. int mid;
  625. int ret;
  626. int wret;
  627. t = path->nodes[level];
  628. c = &t->node;
  629. if (t == root->node) {
  630. /* trying to split the root, lets make a new one */
  631. ret = insert_new_root(root, path, level + 1);
  632. if (ret)
  633. return ret;
  634. }
  635. split_buffer = alloc_free_block(root);
  636. split = &split_buffer->node;
  637. split->header.flags = c->header.flags;
  638. split->header.blocknr = split_buffer->blocknr;
  639. split->header.parentid = root->node->node.header.parentid;
  640. mid = (c->header.nritems + 1) / 2;
  641. memcpy(split->keys, c->keys + mid,
  642. (c->header.nritems - mid) * sizeof(struct key));
  643. memcpy(split->blockptrs, c->blockptrs + mid,
  644. (c->header.nritems - mid) * sizeof(u64));
  645. split->header.nritems = c->header.nritems - mid;
  646. c->header.nritems = mid;
  647. ret = 0;
  648. wret = write_tree_block(root, t);
  649. if (wret)
  650. ret = wret;
  651. wret = write_tree_block(root, split_buffer);
  652. if (wret)
  653. ret = wret;
  654. wret = insert_ptr(root, path, split->keys, split_buffer->blocknr,
  655. path->slots[level + 1] + 1, level + 1);
  656. if (wret)
  657. ret = wret;
  658. if (path->slots[level] >= mid) {
  659. path->slots[level] -= mid;
  660. tree_block_release(root, t);
  661. path->nodes[level] = split_buffer;
  662. path->slots[level + 1] += 1;
  663. } else {
  664. tree_block_release(root, split_buffer);
  665. }
  666. return ret;
  667. }
  668. /*
  669. * how many bytes are required to store the items in a leaf. start
  670. * and nr indicate which items in the leaf to check. This totals up the
  671. * space used both by the item structs and the item data
  672. */
  673. static int leaf_space_used(struct leaf *l, int start, int nr)
  674. {
  675. int data_len;
  676. int end = start + nr - 1;
  677. if (!nr)
  678. return 0;
  679. data_len = l->items[start].offset + l->items[start].size;
  680. data_len = data_len - l->items[end].offset;
  681. data_len += sizeof(struct item) * nr;
  682. return data_len;
  683. }
  684. /*
  685. * push some data in the path leaf to the right, trying to free up at
  686. * least data_size bytes. returns zero if the push worked, nonzero otherwise
  687. *
  688. * returns 1 if the push failed because the other node didn't have enough
  689. * room, 0 if everything worked out and < 0 if there were major errors.
  690. */
  691. static int push_leaf_right(struct ctree_root *root, struct ctree_path *path,
  692. int data_size)
  693. {
  694. struct tree_buffer *left_buf = path->nodes[0];
  695. struct leaf *left = &left_buf->leaf;
  696. struct leaf *right;
  697. struct tree_buffer *right_buf;
  698. struct tree_buffer *upper;
  699. int slot;
  700. int i;
  701. int free_space;
  702. int push_space = 0;
  703. int push_items = 0;
  704. struct item *item;
  705. slot = path->slots[1];
  706. if (!path->nodes[1]) {
  707. return 1;
  708. }
  709. upper = path->nodes[1];
  710. if (slot >= upper->node.header.nritems - 1) {
  711. return 1;
  712. }
  713. right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]);
  714. right = &right_buf->leaf;
  715. free_space = leaf_free_space(right);
  716. if (free_space < data_size + sizeof(struct item)) {
  717. tree_block_release(root, right_buf);
  718. return 1;
  719. }
  720. for (i = left->header.nritems - 1; i >= 0; i--) {
  721. item = left->items + i;
  722. if (path->slots[0] == i)
  723. push_space += data_size + sizeof(*item);
  724. if (item->size + sizeof(*item) + push_space > free_space)
  725. break;
  726. push_items++;
  727. push_space += item->size + sizeof(*item);
  728. }
  729. if (push_items == 0) {
  730. tree_block_release(root, right_buf);
  731. return 1;
  732. }
  733. /* push left to right */
  734. push_space = left->items[left->header.nritems - push_items].offset +
  735. left->items[left->header.nritems - push_items].size;
  736. push_space -= leaf_data_end(left);
  737. /* make room in the right data area */
  738. memmove(right->data + leaf_data_end(right) - push_space,
  739. right->data + leaf_data_end(right),
  740. LEAF_DATA_SIZE - leaf_data_end(right));
  741. /* copy from the left data area */
  742. memcpy(right->data + LEAF_DATA_SIZE - push_space,
  743. left->data + leaf_data_end(left),
  744. push_space);
  745. memmove(right->items + push_items, right->items,
  746. right->header.nritems * sizeof(struct item));
  747. /* copy the items from left to right */
  748. memcpy(right->items, left->items + left->header.nritems - push_items,
  749. push_items * sizeof(struct item));
  750. /* update the item pointers */
  751. right->header.nritems += push_items;
  752. push_space = LEAF_DATA_SIZE;
  753. for (i = 0; i < right->header.nritems; i++) {
  754. right->items[i].offset = push_space - right->items[i].size;
  755. push_space = right->items[i].offset;
  756. }
  757. left->header.nritems -= push_items;
  758. write_tree_block(root, left_buf);
  759. write_tree_block(root, right_buf);
  760. memcpy(upper->node.keys + slot + 1,
  761. &right->items[0].key, sizeof(struct key));
  762. write_tree_block(root, upper);
  763. /* then fixup the leaf pointer in the path */
  764. if (path->slots[0] >= left->header.nritems) {
  765. path->slots[0] -= left->header.nritems;
  766. tree_block_release(root, path->nodes[0]);
  767. path->nodes[0] = right_buf;
  768. path->slots[1] += 1;
  769. } else {
  770. tree_block_release(root, right_buf);
  771. }
  772. return 0;
  773. }
  774. /*
  775. * push some data in the path leaf to the left, trying to free up at
  776. * least data_size bytes. returns zero if the push worked, nonzero otherwise
  777. */
  778. static int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
  779. int data_size)
  780. {
  781. struct tree_buffer *right_buf = path->nodes[0];
  782. struct leaf *right = &right_buf->leaf;
  783. struct tree_buffer *t;
  784. struct leaf *left;
  785. int slot;
  786. int i;
  787. int free_space;
  788. int push_space = 0;
  789. int push_items = 0;
  790. struct item *item;
  791. int old_left_nritems;
  792. int ret = 0;
  793. int wret;
  794. slot = path->slots[1];
  795. if (slot == 0) {
  796. return 1;
  797. }
  798. if (!path->nodes[1]) {
  799. return 1;
  800. }
  801. t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
  802. left = &t->leaf;
  803. free_space = leaf_free_space(left);
  804. if (free_space < data_size + sizeof(struct item)) {
  805. tree_block_release(root, t);
  806. return 1;
  807. }
  808. for (i = 0; i < right->header.nritems; i++) {
  809. item = right->items + i;
  810. if (path->slots[0] == i)
  811. push_space += data_size + sizeof(*item);
  812. if (item->size + sizeof(*item) + push_space > free_space)
  813. break;
  814. push_items++;
  815. push_space += item->size + sizeof(*item);
  816. }
  817. if (push_items == 0) {
  818. tree_block_release(root, t);
  819. return 1;
  820. }
  821. /* push data from right to left */
  822. memcpy(left->items + left->header.nritems,
  823. right->items, push_items * sizeof(struct item));
  824. push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
  825. memcpy(left->data + leaf_data_end(left) - push_space,
  826. right->data + right->items[push_items - 1].offset,
  827. push_space);
  828. old_left_nritems = left->header.nritems;
  829. BUG_ON(old_left_nritems < 0);
  830. for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
  831. left->items[i].offset -= LEAF_DATA_SIZE -
  832. left->items[old_left_nritems -1].offset;
  833. }
  834. left->header.nritems += push_items;
  835. /* fixup right node */
  836. push_space = right->items[push_items-1].offset - leaf_data_end(right);
  837. memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
  838. leaf_data_end(right), push_space);
  839. memmove(right->items, right->items + push_items,
  840. (right->header.nritems - push_items) * sizeof(struct item));
  841. right->header.nritems -= push_items;
  842. push_space = LEAF_DATA_SIZE;
  843. for (i = 0; i < right->header.nritems; i++) {
  844. right->items[i].offset = push_space - right->items[i].size;
  845. push_space = right->items[i].offset;
  846. }
  847. wret = write_tree_block(root, t);
  848. if (wret)
  849. ret = wret;
  850. wret = write_tree_block(root, right_buf);
  851. if (wret)
  852. ret = wret;
  853. wret = fixup_low_keys(root, path, &right->items[0].key, 1);
  854. if (wret)
  855. ret = wret;
  856. /* then fixup the leaf pointer in the path */
  857. if (path->slots[0] < push_items) {
  858. path->slots[0] += old_left_nritems;
  859. tree_block_release(root, path->nodes[0]);
  860. path->nodes[0] = t;
  861. path->slots[1] -= 1;
  862. } else {
  863. tree_block_release(root, t);
  864. path->slots[0] -= push_items;
  865. }
  866. BUG_ON(path->slots[0] < 0);
  867. return ret;
  868. }
  869. /*
  870. * split the path's leaf in two, making sure there is at least data_size
  871. * available for the resulting leaf level of the path.
  872. *
  873. * returns 0 if all went well and < 0 on failure.
  874. */
  875. static int split_leaf(struct ctree_root *root, struct ctree_path *path,
  876. int data_size)
  877. {
  878. struct tree_buffer *l_buf;
  879. struct leaf *l;
  880. int nritems;
  881. int mid;
  882. int slot;
  883. struct leaf *right;
  884. struct tree_buffer *right_buffer;
  885. int space_needed = data_size + sizeof(struct item);
  886. int data_copy_size;
  887. int rt_data_off;
  888. int i;
  889. int ret;
  890. int wret;
  891. wret = push_leaf_left(root, path, data_size);
  892. if (wret < 0)
  893. return wret;
  894. if (wret) {
  895. wret = push_leaf_right(root, path, data_size);
  896. if (wret < 0)
  897. return wret;
  898. }
  899. l_buf = path->nodes[0];
  900. l = &l_buf->leaf;
  901. /* did the pushes work? */
  902. if (leaf_free_space(l) >= sizeof(struct item) + data_size)
  903. return 0;
  904. if (!path->nodes[1]) {
  905. ret = insert_new_root(root, path, 1);
  906. if (ret)
  907. return ret;
  908. }
  909. slot = path->slots[0];
  910. nritems = l->header.nritems;
  911. mid = (nritems + 1)/ 2;
  912. right_buffer = alloc_free_block(root);
  913. BUG_ON(!right_buffer);
  914. BUG_ON(mid == nritems);
  915. right = &right_buffer->leaf;
  916. memset(right, 0, sizeof(*right));
  917. if (mid <= slot) {
  918. /* FIXME, just alloc a new leaf here */
  919. if (leaf_space_used(l, mid, nritems - mid) + space_needed >
  920. LEAF_DATA_SIZE)
  921. BUG();
  922. } else {
  923. /* FIXME, just alloc a new leaf here */
  924. if (leaf_space_used(l, 0, mid + 1) + space_needed >
  925. LEAF_DATA_SIZE)
  926. BUG();
  927. }
  928. right->header.nritems = nritems - mid;
  929. right->header.blocknr = right_buffer->blocknr;
  930. right->header.flags = node_level(0);
  931. right->header.parentid = root->node->node.header.parentid;
  932. data_copy_size = l->items[mid].offset + l->items[mid].size -
  933. leaf_data_end(l);
  934. memcpy(right->items, l->items + mid,
  935. (nritems - mid) * sizeof(struct item));
  936. memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
  937. l->data + leaf_data_end(l), data_copy_size);
  938. rt_data_off = LEAF_DATA_SIZE -
  939. (l->items[mid].offset + l->items[mid].size);
  940. for (i = 0; i < right->header.nritems; i++)
  941. right->items[i].offset += rt_data_off;
  942. l->header.nritems = mid;
  943. ret = 0;
  944. wret = insert_ptr(root, path, &right->items[0].key,
  945. right_buffer->blocknr, path->slots[1] + 1, 1);
  946. if (wret)
  947. ret = wret;
  948. wret = write_tree_block(root, right_buffer);
  949. if (wret)
  950. ret = wret;
  951. wret = write_tree_block(root, l_buf);
  952. if (wret)
  953. ret = wret;
  954. BUG_ON(path->slots[0] != slot);
  955. if (mid <= slot) {
  956. tree_block_release(root, path->nodes[0]);
  957. path->nodes[0] = right_buffer;
  958. path->slots[0] -= mid;
  959. path->slots[1] += 1;
  960. } else
  961. tree_block_release(root, right_buffer);
  962. BUG_ON(path->slots[0] < 0);
  963. return ret;
  964. }
  965. /*
  966. * Given a key and some data, insert an item into the tree.
  967. * This does all the path init required, making room in the tree if needed.
  968. */
  969. int insert_item(struct ctree_root *root, struct key *key,
  970. void *data, int data_size)
  971. {
  972. int ret = 0;
  973. int wret;
  974. int slot;
  975. int slot_orig;
  976. struct leaf *leaf;
  977. struct tree_buffer *leaf_buf;
  978. unsigned int nritems;
  979. unsigned int data_end;
  980. struct ctree_path path;
  981. /* create a root if there isn't one */
  982. if (!root->node)
  983. BUG();
  984. init_path(&path);
  985. ret = search_slot(root, key, &path, data_size);
  986. if (ret == 0) {
  987. release_path(root, &path);
  988. return -EEXIST;
  989. }
  990. if (ret < 0) {
  991. release_path(root, &path);
  992. return ret;
  993. }
  994. slot_orig = path.slots[0];
  995. leaf_buf = path.nodes[0];
  996. leaf = &leaf_buf->leaf;
  997. nritems = leaf->header.nritems;
  998. data_end = leaf_data_end(leaf);
  999. if (leaf_free_space(leaf) < sizeof(struct item) + data_size)
  1000. BUG();
  1001. slot = path.slots[0];
  1002. BUG_ON(slot < 0);
  1003. if (slot != nritems) {
  1004. int i;
  1005. unsigned int old_data = leaf->items[slot].offset +
  1006. leaf->items[slot].size;
  1007. /*
  1008. * item0..itemN ... dataN.offset..dataN.size .. data0.size
  1009. */
  1010. /* first correct the data pointers */
  1011. for (i = slot; i < nritems; i++)
  1012. leaf->items[i].offset -= data_size;
  1013. /* shift the items */
  1014. memmove(leaf->items + slot + 1, leaf->items + slot,
  1015. (nritems - slot) * sizeof(struct item));
  1016. /* shift the data */
  1017. memmove(leaf->data + data_end - data_size, leaf->data +
  1018. data_end, old_data - data_end);
  1019. data_end = old_data;
  1020. }
  1021. /* copy the new data in */
  1022. memcpy(&leaf->items[slot].key, key, sizeof(struct key));
  1023. leaf->items[slot].offset = data_end - data_size;
  1024. leaf->items[slot].size = data_size;
  1025. memcpy(leaf->data + data_end - data_size, data, data_size);
  1026. leaf->header.nritems += 1;
  1027. ret = 0;
  1028. if (slot == 0)
  1029. ret = fixup_low_keys(root, &path, key, 1);
  1030. wret = write_tree_block(root, leaf_buf);
  1031. if (wret)
  1032. ret = wret;
  1033. if (leaf_free_space(leaf) < 0)
  1034. BUG();
  1035. check_leaf(&path, 0);
  1036. release_path(root, &path);
  1037. return ret;
  1038. }
  1039. /*
  1040. * delete the pointer from a given node.
  1041. *
  1042. * If the delete empties a node, the node is removed from the tree,
  1043. * continuing all the way the root if required. The root is converted into
  1044. * a leaf if all the nodes are emptied.
  1045. */
  1046. static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level,
  1047. int slot)
  1048. {
  1049. struct node *node;
  1050. struct tree_buffer *parent = path->nodes[level];
  1051. int nritems;
  1052. int ret = 0;
  1053. int wret;
  1054. node = &parent->node;
  1055. nritems = node->header.nritems;
  1056. if (slot != nritems -1) {
  1057. memmove(node->keys + slot, node->keys + slot + 1,
  1058. sizeof(struct key) * (nritems - slot - 1));
  1059. memmove(node->blockptrs + slot,
  1060. node->blockptrs + slot + 1,
  1061. sizeof(u64) * (nritems - slot - 1));
  1062. }
  1063. node->header.nritems--;
  1064. if (node->header.nritems == 0 && parent == root->node) {
  1065. BUG_ON(node_level(root->node->node.header.flags) != 1);
  1066. /* just turn the root into a leaf and break */
  1067. root->node->node.header.flags = node_level(0);
  1068. } else if (slot == 0) {
  1069. wret = fixup_low_keys(root, path, node->keys, level + 1);
  1070. if (wret)
  1071. ret = wret;
  1072. }
  1073. wret = write_tree_block(root, parent);
  1074. if (wret)
  1075. ret = wret;
  1076. return ret;
  1077. }
  1078. /*
  1079. * delete the item at the leaf level in path. If that empties
  1080. * the leaf, remove it from the tree
  1081. */
  1082. int del_item(struct ctree_root *root, struct ctree_path *path)
  1083. {
  1084. int slot;
  1085. struct leaf *leaf;
  1086. struct tree_buffer *leaf_buf;
  1087. int doff;
  1088. int dsize;
  1089. int ret = 0;
  1090. int wret;
  1091. leaf_buf = path->nodes[0];
  1092. leaf = &leaf_buf->leaf;
  1093. slot = path->slots[0];
  1094. doff = leaf->items[slot].offset;
  1095. dsize = leaf->items[slot].size;
  1096. if (slot != leaf->header.nritems - 1) {
  1097. int i;
  1098. int data_end = leaf_data_end(leaf);
  1099. memmove(leaf->data + data_end + dsize,
  1100. leaf->data + data_end,
  1101. doff - data_end);
  1102. for (i = slot + 1; i < leaf->header.nritems; i++)
  1103. leaf->items[i].offset += dsize;
  1104. memmove(leaf->items + slot, leaf->items + slot + 1,
  1105. sizeof(struct item) *
  1106. (leaf->header.nritems - slot - 1));
  1107. }
  1108. leaf->header.nritems -= 1;
  1109. /* delete the leaf if we've emptied it */
  1110. if (leaf->header.nritems == 0) {
  1111. if (leaf_buf == root->node) {
  1112. leaf->header.flags = node_level(0);
  1113. write_tree_block(root, leaf_buf);
  1114. } else {
  1115. wret = del_ptr(root, path, 1, path->slots[1]);
  1116. if (wret)
  1117. ret = wret;
  1118. wret = free_extent(root, leaf_buf->blocknr, 1);
  1119. if (wret)
  1120. ret = wret;
  1121. }
  1122. } else {
  1123. int used = leaf_space_used(leaf, 0, leaf->header.nritems);
  1124. if (slot == 0) {
  1125. wret = fixup_low_keys(root, path,
  1126. &leaf->items[0].key, 1);
  1127. if (wret)
  1128. ret = wret;
  1129. }
  1130. wret = write_tree_block(root, leaf_buf);
  1131. if (wret)
  1132. ret = wret;
  1133. /* delete the leaf if it is mostly empty */
  1134. if (used < LEAF_DATA_SIZE / 3) {
  1135. /* push_leaf_left fixes the path.
  1136. * make sure the path still points to our leaf
  1137. * for possible call to del_ptr below
  1138. */
  1139. slot = path->slots[1];
  1140. leaf_buf->count++;
  1141. wret = push_leaf_left(root, path, 1);
  1142. if (wret < 0)
  1143. ret = wret;
  1144. if (leaf->header.nritems) {
  1145. wret = push_leaf_right(root, path, 1);
  1146. if (wret < 0)
  1147. ret = wret;
  1148. }
  1149. if (leaf->header.nritems == 0) {
  1150. u64 blocknr = leaf_buf->blocknr;
  1151. wret = del_ptr(root, path, 1, slot);
  1152. if (wret)
  1153. ret = wret;
  1154. tree_block_release(root, leaf_buf);
  1155. wret = free_extent(root, blocknr, 1);
  1156. if (wret)
  1157. ret = wret;
  1158. } else {
  1159. tree_block_release(root, leaf_buf);
  1160. }
  1161. }
  1162. }
  1163. return ret;
  1164. }
  1165. /*
  1166. * walk up the tree as far as required to find the next leaf.
  1167. * returns 0 if it found something or 1 if there are no greater leaves.
  1168. * returns < 0 on io errors.
  1169. */
  1170. int next_leaf(struct ctree_root *root, struct ctree_path *path)
  1171. {
  1172. int slot;
  1173. int level = 1;
  1174. u64 blocknr;
  1175. struct tree_buffer *c;
  1176. struct tree_buffer *next = NULL;
  1177. while(level < MAX_LEVEL) {
  1178. if (!path->nodes[level])
  1179. return 1;
  1180. slot = path->slots[level] + 1;
  1181. c = path->nodes[level];
  1182. if (slot >= c->node.header.nritems) {
  1183. level++;
  1184. continue;
  1185. }
  1186. blocknr = c->node.blockptrs[slot];
  1187. if (next)
  1188. tree_block_release(root, next);
  1189. next = read_tree_block(root, blocknr);
  1190. break;
  1191. }
  1192. path->slots[level] = slot;
  1193. while(1) {
  1194. level--;
  1195. c = path->nodes[level];
  1196. tree_block_release(root, c);
  1197. path->nodes[level] = next;
  1198. path->slots[level] = 0;
  1199. if (!level)
  1200. break;
  1201. next = read_tree_block(root, next->node.blockptrs[0]);
  1202. }
  1203. return 0;
  1204. }