ctree.c 60 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166
  1. /*
  2. * Copyright (C) 2007 Oracle. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include "ctree.h"
  19. #include "disk-io.h"
  20. #include "transaction.h"
  21. static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
  22. *root, struct btrfs_path *path, int level);
  23. static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
  24. *root, struct btrfs_key *ins_key,
  25. struct btrfs_path *path, int data_size);
  26. static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
  27. *root, struct buffer_head *dst, struct buffer_head
  28. *src);
  29. static int balance_node_right(struct btrfs_trans_handle *trans, struct
  30. btrfs_root *root, struct buffer_head *dst_buf,
  31. struct buffer_head *src_buf);
  32. static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  33. struct btrfs_path *path, int level, int slot);
  34. inline void btrfs_init_path(struct btrfs_path *p)
  35. {
  36. memset(p, 0, sizeof(*p));
  37. }
  38. struct btrfs_path *btrfs_alloc_path(void)
  39. {
  40. struct btrfs_path *path;
  41. path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
  42. if (path) {
  43. btrfs_init_path(path);
  44. path->reada = 1;
  45. }
  46. return path;
  47. }
  48. void btrfs_free_path(struct btrfs_path *p)
  49. {
  50. btrfs_release_path(NULL, p);
  51. kmem_cache_free(btrfs_path_cachep, p);
  52. }
  53. void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
  54. {
  55. int i;
  56. for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
  57. if (!p->nodes[i])
  58. break;
  59. btrfs_block_release(root, p->nodes[i]);
  60. }
  61. memset(p, 0, sizeof(*p));
  62. }
  63. static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
  64. *root, struct buffer_head *buf, struct buffer_head
  65. *parent, int parent_slot, struct buffer_head
  66. **cow_ret, u64 search_start, u64 empty_size)
  67. {
  68. struct buffer_head *cow;
  69. struct btrfs_node *cow_node;
  70. int ret = 0;
  71. int different_trans = 0;
  72. WARN_ON(root->ref_cows && trans->transid != root->last_trans);
  73. WARN_ON(!buffer_uptodate(buf));
  74. cow = btrfs_alloc_free_block(trans, root, search_start, empty_size);
  75. if (IS_ERR(cow))
  76. return PTR_ERR(cow);
  77. cow_node = btrfs_buffer_node(cow);
  78. if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
  79. WARN_ON(1);
  80. memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
  81. btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
  82. btrfs_set_header_generation(&cow_node->header, trans->transid);
  83. btrfs_set_header_owner(&cow_node->header, root->root_key.objectid);
  84. WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) >
  85. trans->transid);
  86. if (btrfs_header_generation(btrfs_buffer_header(buf)) !=
  87. trans->transid) {
  88. different_trans = 1;
  89. ret = btrfs_inc_ref(trans, root, buf);
  90. if (ret)
  91. return ret;
  92. } else {
  93. clean_tree_block(trans, root, buf);
  94. }
  95. if (buf == root->node) {
  96. root->node = cow;
  97. get_bh(cow);
  98. if (buf != root->commit_root) {
  99. btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
  100. }
  101. btrfs_block_release(root, buf);
  102. } else {
  103. btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
  104. bh_blocknr(cow));
  105. btrfs_mark_buffer_dirty(parent);
  106. WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) !=
  107. trans->transid);
  108. btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
  109. }
  110. btrfs_block_release(root, buf);
  111. btrfs_mark_buffer_dirty(cow);
  112. *cow_ret = cow;
  113. return 0;
  114. }
  115. int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
  116. *root, struct buffer_head *buf, struct buffer_head
  117. *parent, int parent_slot, struct buffer_head
  118. **cow_ret)
  119. {
  120. u64 search_start;
  121. if (trans->transaction != root->fs_info->running_transaction) {
  122. printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
  123. root->fs_info->running_transaction->transid);
  124. WARN_ON(1);
  125. }
  126. if (trans->transid != root->fs_info->generation) {
  127. printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
  128. root->fs_info->generation);
  129. WARN_ON(1);
  130. }
  131. if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
  132. trans->transid) {
  133. *cow_ret = buf;
  134. return 0;
  135. }
  136. search_start = bh_blocknr(buf) & ~((u64)65535);
  137. return __btrfs_cow_block(trans, root, buf, parent,
  138. parent_slot, cow_ret, search_start, 0);
  139. }
  140. static int close_blocks(u64 blocknr, u64 other)
  141. {
  142. if (blocknr < other && other - blocknr < 8)
  143. return 1;
  144. if (blocknr > other && blocknr - other < 8)
  145. return 1;
  146. return 0;
  147. }
  148. static int should_defrag_leaf(struct buffer_head *bh)
  149. {
  150. struct btrfs_leaf *leaf = btrfs_buffer_leaf(bh);
  151. struct btrfs_disk_key *key;
  152. u32 nritems;
  153. if (buffer_defrag(bh))
  154. return 1;
  155. nritems = btrfs_header_nritems(&leaf->header);
  156. if (nritems == 0)
  157. return 0;
  158. key = &leaf->items[0].key;
  159. if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY)
  160. return 1;
  161. key = &leaf->items[nritems-1].key;
  162. if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY)
  163. return 1;
  164. if (nritems > 4) {
  165. key = &leaf->items[nritems/2].key;
  166. if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY)
  167. return 1;
  168. }
  169. return 0;
  170. }
  171. int btrfs_realloc_node(struct btrfs_trans_handle *trans,
  172. struct btrfs_root *root, struct buffer_head *parent,
  173. int cache_only, u64 *last_ret)
  174. {
  175. struct btrfs_node *parent_node;
  176. struct buffer_head *cur_bh;
  177. struct buffer_head *tmp_bh;
  178. u64 blocknr;
  179. u64 search_start = *last_ret;
  180. u64 last_block = 0;
  181. u64 other;
  182. u32 parent_nritems;
  183. int start_slot;
  184. int end_slot;
  185. int i;
  186. int err = 0;
  187. int parent_level;
  188. if (trans->transaction != root->fs_info->running_transaction) {
  189. printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
  190. root->fs_info->running_transaction->transid);
  191. WARN_ON(1);
  192. }
  193. if (trans->transid != root->fs_info->generation) {
  194. printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
  195. root->fs_info->generation);
  196. WARN_ON(1);
  197. }
  198. parent_node = btrfs_buffer_node(parent);
  199. parent_nritems = btrfs_header_nritems(&parent_node->header);
  200. parent_level = btrfs_header_level(&parent_node->header);
  201. start_slot = 0;
  202. end_slot = parent_nritems;
  203. if (parent_nritems == 1)
  204. return 0;
  205. for (i = start_slot; i < end_slot; i++) {
  206. int close = 1;
  207. blocknr = btrfs_node_blockptr(parent_node, i);
  208. if (last_block == 0)
  209. last_block = blocknr;
  210. if (i > 0) {
  211. other = btrfs_node_blockptr(parent_node, i - 1);
  212. close = close_blocks(blocknr, other);
  213. }
  214. if (close && i < end_slot - 1) {
  215. other = btrfs_node_blockptr(parent_node, i + 1);
  216. close = close_blocks(blocknr, other);
  217. }
  218. if (close) {
  219. last_block = blocknr;
  220. continue;
  221. }
  222. cur_bh = btrfs_find_tree_block(root, blocknr);
  223. if (!cur_bh || !buffer_uptodate(cur_bh) ||
  224. buffer_locked(cur_bh) ||
  225. (parent_level != 1 && !buffer_defrag(cur_bh)) ||
  226. (parent_level == 1 && !should_defrag_leaf(cur_bh))) {
  227. if (cache_only) {
  228. brelse(cur_bh);
  229. continue;
  230. }
  231. if (!cur_bh || !buffer_uptodate(cur_bh) ||
  232. buffer_locked(cur_bh)) {
  233. brelse(cur_bh);
  234. cur_bh = read_tree_block(root, blocknr);
  235. }
  236. }
  237. if (search_start == 0)
  238. search_start = last_block & ~((u64)65535);
  239. err = __btrfs_cow_block(trans, root, cur_bh, parent, i,
  240. &tmp_bh, search_start,
  241. min(8, end_slot - i));
  242. if (err) {
  243. brelse(cur_bh);
  244. break;
  245. }
  246. search_start = bh_blocknr(tmp_bh);
  247. *last_ret = search_start;
  248. if (parent_level == 1)
  249. clear_buffer_defrag(tmp_bh);
  250. brelse(tmp_bh);
  251. }
  252. return err;
  253. }
  254. /*
  255. * The leaf data grows from end-to-front in the node.
  256. * this returns the address of the start of the last item,
  257. * which is the stop of the leaf data stack
  258. */
  259. static inline unsigned int leaf_data_end(struct btrfs_root *root,
  260. struct btrfs_leaf *leaf)
  261. {
  262. u32 nr = btrfs_header_nritems(&leaf->header);
  263. if (nr == 0)
  264. return BTRFS_LEAF_DATA_SIZE(root);
  265. return btrfs_item_offset(leaf->items + nr - 1);
  266. }
  267. /*
  268. * compare two keys in a memcmp fashion
  269. */
  270. static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
  271. {
  272. struct btrfs_key k1;
  273. btrfs_disk_key_to_cpu(&k1, disk);
  274. if (k1.objectid > k2->objectid)
  275. return 1;
  276. if (k1.objectid < k2->objectid)
  277. return -1;
  278. if (k1.flags > k2->flags)
  279. return 1;
  280. if (k1.flags < k2->flags)
  281. return -1;
  282. if (k1.offset > k2->offset)
  283. return 1;
  284. if (k1.offset < k2->offset)
  285. return -1;
  286. return 0;
  287. }
  288. static int check_node(struct btrfs_root *root, struct btrfs_path *path,
  289. int level)
  290. {
  291. struct btrfs_node *parent = NULL;
  292. struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
  293. int parent_slot;
  294. int slot;
  295. struct btrfs_key cpukey;
  296. u32 nritems = btrfs_header_nritems(&node->header);
  297. if (path->nodes[level + 1])
  298. parent = btrfs_buffer_node(path->nodes[level + 1]);
  299. slot = path->slots[level];
  300. BUG_ON(!buffer_uptodate(path->nodes[level]));
  301. BUG_ON(nritems == 0);
  302. if (parent) {
  303. struct btrfs_disk_key *parent_key;
  304. parent_slot = path->slots[level + 1];
  305. parent_key = &parent->ptrs[parent_slot].key;
  306. BUG_ON(memcmp(parent_key, &node->ptrs[0].key,
  307. sizeof(struct btrfs_disk_key)));
  308. BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
  309. btrfs_header_blocknr(&node->header));
  310. }
  311. BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
  312. if (slot != 0) {
  313. btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot - 1].key);
  314. BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) <= 0);
  315. }
  316. if (slot < nritems - 1) {
  317. btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot + 1].key);
  318. BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) >= 0);
  319. }
  320. return 0;
  321. }
  322. static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
  323. int level)
  324. {
  325. struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]);
  326. struct btrfs_node *parent = NULL;
  327. int parent_slot;
  328. int slot = path->slots[0];
  329. struct btrfs_key cpukey;
  330. u32 nritems = btrfs_header_nritems(&leaf->header);
  331. if (path->nodes[level + 1])
  332. parent = btrfs_buffer_node(path->nodes[level + 1]);
  333. BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
  334. if (nritems == 0)
  335. return 0;
  336. if (parent) {
  337. struct btrfs_disk_key *parent_key;
  338. parent_slot = path->slots[level + 1];
  339. parent_key = &parent->ptrs[parent_slot].key;
  340. BUG_ON(memcmp(parent_key, &leaf->items[0].key,
  341. sizeof(struct btrfs_disk_key)));
  342. BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
  343. btrfs_header_blocknr(&leaf->header));
  344. }
  345. if (slot != 0) {
  346. btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot - 1].key);
  347. BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) <= 0);
  348. BUG_ON(btrfs_item_offset(leaf->items + slot - 1) !=
  349. btrfs_item_end(leaf->items + slot));
  350. }
  351. if (slot < nritems - 1) {
  352. btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot + 1].key);
  353. BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) >= 0);
  354. BUG_ON(btrfs_item_offset(leaf->items + slot) !=
  355. btrfs_item_end(leaf->items + slot + 1));
  356. }
  357. BUG_ON(btrfs_item_offset(leaf->items) +
  358. btrfs_item_size(leaf->items) != BTRFS_LEAF_DATA_SIZE(root));
  359. return 0;
  360. }
  361. static int check_block(struct btrfs_root *root, struct btrfs_path *path,
  362. int level)
  363. {
  364. struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
  365. if (memcmp(node->header.fsid, root->fs_info->disk_super->fsid,
  366. sizeof(node->header.fsid)))
  367. BUG();
  368. if (level == 0)
  369. return check_leaf(root, path, level);
  370. return check_node(root, path, level);
  371. }
  372. /*
  373. * search for key in the array p. items p are item_size apart
  374. * and there are 'max' items in p
  375. * the slot in the array is returned via slot, and it points to
  376. * the place where you would insert key if it is not found in
  377. * the array.
  378. *
  379. * slot may point to max if the key is bigger than all of the keys
  380. */
  381. static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
  382. int max, int *slot)
  383. {
  384. int low = 0;
  385. int high = max;
  386. int mid;
  387. int ret;
  388. struct btrfs_disk_key *tmp;
  389. while(low < high) {
  390. mid = (low + high) / 2;
  391. tmp = (struct btrfs_disk_key *)(p + mid * item_size);
  392. ret = comp_keys(tmp, key);
  393. if (ret < 0)
  394. low = mid + 1;
  395. else if (ret > 0)
  396. high = mid;
  397. else {
  398. *slot = mid;
  399. return 0;
  400. }
  401. }
  402. *slot = low;
  403. return 1;
  404. }
  405. /*
  406. * simple bin_search frontend that does the right thing for
  407. * leaves vs nodes
  408. */
  409. static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot)
  410. {
  411. if (btrfs_is_leaf(c)) {
  412. struct btrfs_leaf *l = (struct btrfs_leaf *)c;
  413. return generic_bin_search((void *)l->items,
  414. sizeof(struct btrfs_item),
  415. key, btrfs_header_nritems(&c->header),
  416. slot);
  417. } else {
  418. return generic_bin_search((void *)c->ptrs,
  419. sizeof(struct btrfs_key_ptr),
  420. key, btrfs_header_nritems(&c->header),
  421. slot);
  422. }
  423. return -1;
  424. }
  425. static struct buffer_head *read_node_slot(struct btrfs_root *root,
  426. struct buffer_head *parent_buf,
  427. int slot)
  428. {
  429. struct btrfs_node *node = btrfs_buffer_node(parent_buf);
  430. if (slot < 0)
  431. return NULL;
  432. if (slot >= btrfs_header_nritems(&node->header))
  433. return NULL;
  434. return read_tree_block(root, btrfs_node_blockptr(node, slot));
  435. }
  436. static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
  437. *root, struct btrfs_path *path, int level)
  438. {
  439. struct buffer_head *right_buf;
  440. struct buffer_head *mid_buf;
  441. struct buffer_head *left_buf;
  442. struct buffer_head *parent_buf = NULL;
  443. struct btrfs_node *right = NULL;
  444. struct btrfs_node *mid;
  445. struct btrfs_node *left = NULL;
  446. struct btrfs_node *parent = NULL;
  447. int ret = 0;
  448. int wret;
  449. int pslot;
  450. int orig_slot = path->slots[level];
  451. int err_on_enospc = 0;
  452. u64 orig_ptr;
  453. if (level == 0)
  454. return 0;
  455. mid_buf = path->nodes[level];
  456. mid = btrfs_buffer_node(mid_buf);
  457. orig_ptr = btrfs_node_blockptr(mid, orig_slot);
  458. if (level < BTRFS_MAX_LEVEL - 1)
  459. parent_buf = path->nodes[level + 1];
  460. pslot = path->slots[level + 1];
  461. /*
  462. * deal with the case where there is only one pointer in the root
  463. * by promoting the node below to a root
  464. */
  465. if (!parent_buf) {
  466. struct buffer_head *child;
  467. u64 blocknr = bh_blocknr(mid_buf);
  468. if (btrfs_header_nritems(&mid->header) != 1)
  469. return 0;
  470. /* promote the child to a root */
  471. child = read_node_slot(root, mid_buf, 0);
  472. BUG_ON(!child);
  473. root->node = child;
  474. path->nodes[level] = NULL;
  475. clean_tree_block(trans, root, mid_buf);
  476. wait_on_buffer(mid_buf);
  477. /* once for the path */
  478. btrfs_block_release(root, mid_buf);
  479. /* once for the root ptr */
  480. btrfs_block_release(root, mid_buf);
  481. return btrfs_free_extent(trans, root, blocknr, 1, 1);
  482. }
  483. parent = btrfs_buffer_node(parent_buf);
  484. if (btrfs_header_nritems(&mid->header) >
  485. BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
  486. return 0;
  487. if (btrfs_header_nritems(&mid->header) < 2)
  488. err_on_enospc = 1;
  489. left_buf = read_node_slot(root, parent_buf, pslot - 1);
  490. if (left_buf) {
  491. wret = btrfs_cow_block(trans, root, left_buf,
  492. parent_buf, pslot - 1, &left_buf);
  493. if (wret) {
  494. ret = wret;
  495. goto enospc;
  496. }
  497. }
  498. right_buf = read_node_slot(root, parent_buf, pslot + 1);
  499. if (right_buf) {
  500. wret = btrfs_cow_block(trans, root, right_buf,
  501. parent_buf, pslot + 1, &right_buf);
  502. if (wret) {
  503. ret = wret;
  504. goto enospc;
  505. }
  506. }
  507. /* first, try to make some room in the middle buffer */
  508. if (left_buf) {
  509. left = btrfs_buffer_node(left_buf);
  510. orig_slot += btrfs_header_nritems(&left->header);
  511. wret = push_node_left(trans, root, left_buf, mid_buf);
  512. if (wret < 0)
  513. ret = wret;
  514. if (btrfs_header_nritems(&mid->header) < 2)
  515. err_on_enospc = 1;
  516. }
  517. /*
  518. * then try to empty the right most buffer into the middle
  519. */
  520. if (right_buf) {
  521. right = btrfs_buffer_node(right_buf);
  522. wret = push_node_left(trans, root, mid_buf, right_buf);
  523. if (wret < 0 && wret != -ENOSPC)
  524. ret = wret;
  525. if (btrfs_header_nritems(&right->header) == 0) {
  526. u64 blocknr = bh_blocknr(right_buf);
  527. clean_tree_block(trans, root, right_buf);
  528. wait_on_buffer(right_buf);
  529. btrfs_block_release(root, right_buf);
  530. right_buf = NULL;
  531. right = NULL;
  532. wret = del_ptr(trans, root, path, level + 1, pslot +
  533. 1);
  534. if (wret)
  535. ret = wret;
  536. wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
  537. if (wret)
  538. ret = wret;
  539. } else {
  540. btrfs_memcpy(root, parent,
  541. &parent->ptrs[pslot + 1].key,
  542. &right->ptrs[0].key,
  543. sizeof(struct btrfs_disk_key));
  544. btrfs_mark_buffer_dirty(parent_buf);
  545. }
  546. }
  547. if (btrfs_header_nritems(&mid->header) == 1) {
  548. /*
  549. * we're not allowed to leave a node with one item in the
  550. * tree during a delete. A deletion from lower in the tree
  551. * could try to delete the only pointer in this node.
  552. * So, pull some keys from the left.
  553. * There has to be a left pointer at this point because
  554. * otherwise we would have pulled some pointers from the
  555. * right
  556. */
  557. BUG_ON(!left_buf);
  558. wret = balance_node_right(trans, root, mid_buf, left_buf);
  559. if (wret < 0) {
  560. ret = wret;
  561. goto enospc;
  562. }
  563. BUG_ON(wret == 1);
  564. }
  565. if (btrfs_header_nritems(&mid->header) == 0) {
  566. /* we've managed to empty the middle node, drop it */
  567. u64 blocknr = bh_blocknr(mid_buf);
  568. clean_tree_block(trans, root, mid_buf);
  569. wait_on_buffer(mid_buf);
  570. btrfs_block_release(root, mid_buf);
  571. mid_buf = NULL;
  572. mid = NULL;
  573. wret = del_ptr(trans, root, path, level + 1, pslot);
  574. if (wret)
  575. ret = wret;
  576. wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
  577. if (wret)
  578. ret = wret;
  579. } else {
  580. /* update the parent key to reflect our changes */
  581. btrfs_memcpy(root, parent,
  582. &parent->ptrs[pslot].key, &mid->ptrs[0].key,
  583. sizeof(struct btrfs_disk_key));
  584. btrfs_mark_buffer_dirty(parent_buf);
  585. }
  586. /* update the path */
  587. if (left_buf) {
  588. if (btrfs_header_nritems(&left->header) > orig_slot) {
  589. get_bh(left_buf);
  590. path->nodes[level] = left_buf;
  591. path->slots[level + 1] -= 1;
  592. path->slots[level] = orig_slot;
  593. if (mid_buf)
  594. btrfs_block_release(root, mid_buf);
  595. } else {
  596. orig_slot -= btrfs_header_nritems(&left->header);
  597. path->slots[level] = orig_slot;
  598. }
  599. }
  600. /* double check we haven't messed things up */
  601. check_block(root, path, level);
  602. if (orig_ptr !=
  603. btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]),
  604. path->slots[level]))
  605. BUG();
  606. enospc:
  607. if (right_buf)
  608. btrfs_block_release(root, right_buf);
  609. if (left_buf)
  610. btrfs_block_release(root, left_buf);
  611. return ret;
  612. }
  613. /* returns zero if the push worked, non-zero otherwise */
  614. static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
  615. struct btrfs_root *root,
  616. struct btrfs_path *path, int level)
  617. {
  618. struct buffer_head *right_buf;
  619. struct buffer_head *mid_buf;
  620. struct buffer_head *left_buf;
  621. struct buffer_head *parent_buf = NULL;
  622. struct btrfs_node *right = NULL;
  623. struct btrfs_node *mid;
  624. struct btrfs_node *left = NULL;
  625. struct btrfs_node *parent = NULL;
  626. int ret = 0;
  627. int wret;
  628. int pslot;
  629. int orig_slot = path->slots[level];
  630. u64 orig_ptr;
  631. if (level == 0)
  632. return 1;
  633. mid_buf = path->nodes[level];
  634. mid = btrfs_buffer_node(mid_buf);
  635. orig_ptr = btrfs_node_blockptr(mid, orig_slot);
  636. if (level < BTRFS_MAX_LEVEL - 1)
  637. parent_buf = path->nodes[level + 1];
  638. pslot = path->slots[level + 1];
  639. if (!parent_buf)
  640. return 1;
  641. parent = btrfs_buffer_node(parent_buf);
  642. left_buf = read_node_slot(root, parent_buf, pslot - 1);
  643. /* first, try to make some room in the middle buffer */
  644. if (left_buf) {
  645. u32 left_nr;
  646. left = btrfs_buffer_node(left_buf);
  647. left_nr = btrfs_header_nritems(&left->header);
  648. if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
  649. wret = 1;
  650. } else {
  651. ret = btrfs_cow_block(trans, root, left_buf, parent_buf,
  652. pslot - 1, &left_buf);
  653. if (ret)
  654. wret = 1;
  655. else {
  656. left = btrfs_buffer_node(left_buf);
  657. wret = push_node_left(trans, root,
  658. left_buf, mid_buf);
  659. }
  660. }
  661. if (wret < 0)
  662. ret = wret;
  663. if (wret == 0) {
  664. orig_slot += left_nr;
  665. btrfs_memcpy(root, parent,
  666. &parent->ptrs[pslot].key,
  667. &mid->ptrs[0].key,
  668. sizeof(struct btrfs_disk_key));
  669. btrfs_mark_buffer_dirty(parent_buf);
  670. if (btrfs_header_nritems(&left->header) > orig_slot) {
  671. path->nodes[level] = left_buf;
  672. path->slots[level + 1] -= 1;
  673. path->slots[level] = orig_slot;
  674. btrfs_block_release(root, mid_buf);
  675. } else {
  676. orig_slot -=
  677. btrfs_header_nritems(&left->header);
  678. path->slots[level] = orig_slot;
  679. btrfs_block_release(root, left_buf);
  680. }
  681. check_node(root, path, level);
  682. return 0;
  683. }
  684. btrfs_block_release(root, left_buf);
  685. }
  686. right_buf = read_node_slot(root, parent_buf, pslot + 1);
  687. /*
  688. * then try to empty the right most buffer into the middle
  689. */
  690. if (right_buf) {
  691. u32 right_nr;
  692. right = btrfs_buffer_node(right_buf);
  693. right_nr = btrfs_header_nritems(&right->header);
  694. if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
  695. wret = 1;
  696. } else {
  697. ret = btrfs_cow_block(trans, root, right_buf,
  698. parent_buf, pslot + 1,
  699. &right_buf);
  700. if (ret)
  701. wret = 1;
  702. else {
  703. right = btrfs_buffer_node(right_buf);
  704. wret = balance_node_right(trans, root,
  705. right_buf, mid_buf);
  706. }
  707. }
  708. if (wret < 0)
  709. ret = wret;
  710. if (wret == 0) {
  711. btrfs_memcpy(root, parent,
  712. &parent->ptrs[pslot + 1].key,
  713. &right->ptrs[0].key,
  714. sizeof(struct btrfs_disk_key));
  715. btrfs_mark_buffer_dirty(parent_buf);
  716. if (btrfs_header_nritems(&mid->header) <= orig_slot) {
  717. path->nodes[level] = right_buf;
  718. path->slots[level + 1] += 1;
  719. path->slots[level] = orig_slot -
  720. btrfs_header_nritems(&mid->header);
  721. btrfs_block_release(root, mid_buf);
  722. } else {
  723. btrfs_block_release(root, right_buf);
  724. }
  725. check_node(root, path, level);
  726. return 0;
  727. }
  728. btrfs_block_release(root, right_buf);
  729. }
  730. check_node(root, path, level);
  731. return 1;
  732. }
  733. /*
  734. * readahead one full node of leaves
  735. */
  736. static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
  737. int level, int slot)
  738. {
  739. struct btrfs_node *node;
  740. int i;
  741. u32 nritems;
  742. u64 item_objectid;
  743. u64 blocknr;
  744. u64 search;
  745. u64 cluster_start;
  746. int ret;
  747. int nread = 0;
  748. int direction = path->reada;
  749. struct radix_tree_root found;
  750. unsigned long gang[8];
  751. struct buffer_head *bh;
  752. if (level == 0)
  753. return;
  754. if (!path->nodes[level])
  755. return;
  756. node = btrfs_buffer_node(path->nodes[level]);
  757. search = btrfs_node_blockptr(node, slot);
  758. bh = btrfs_find_tree_block(root, search);
  759. if (bh) {
  760. brelse(bh);
  761. return;
  762. }
  763. init_bit_radix(&found);
  764. nritems = btrfs_header_nritems(&node->header);
  765. for (i = slot; i < nritems; i++) {
  766. item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
  767. blocknr = btrfs_node_blockptr(node, i);
  768. set_radix_bit(&found, blocknr);
  769. }
  770. if (direction > 0) {
  771. cluster_start = search - 4;
  772. if (cluster_start > search)
  773. cluster_start = 0;
  774. } else
  775. cluster_start = search + 4;
  776. while(1) {
  777. ret = find_first_radix_bit(&found, gang, 0, ARRAY_SIZE(gang));
  778. if (!ret)
  779. break;
  780. for (i = 0; i < ret; i++) {
  781. blocknr = gang[i];
  782. clear_radix_bit(&found, blocknr);
  783. if (path->reada == 1 && nread > 16)
  784. continue;
  785. if (close_blocks(cluster_start, blocknr)) {
  786. readahead_tree_block(root, blocknr);
  787. nread++;
  788. cluster_start = blocknr;
  789. }
  790. }
  791. }
  792. }
  793. /*
  794. * look for key in the tree. path is filled in with nodes along the way
  795. * if key is found, we return zero and you can find the item in the leaf
  796. * level of the path (level 0)
  797. *
  798. * If the key isn't found, the path points to the slot where it should
  799. * be inserted, and 1 is returned. If there are other errors during the
  800. * search a negative error number is returned.
  801. *
  802. * if ins_len > 0, nodes and leaves will be split as we walk down the
  803. * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
  804. * possible)
  805. */
  806. int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
  807. *root, struct btrfs_key *key, struct btrfs_path *p, int
  808. ins_len, int cow)
  809. {
  810. struct buffer_head *b;
  811. struct btrfs_node *c;
  812. u64 blocknr;
  813. int slot;
  814. int ret;
  815. int level;
  816. int should_reada = p->reada;
  817. u8 lowest_level = 0;
  818. lowest_level = p->lowest_level;
  819. WARN_ON(lowest_level && ins_len);
  820. WARN_ON(p->nodes[0] != NULL);
  821. WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
  822. again:
  823. b = root->node;
  824. get_bh(b);
  825. while (b) {
  826. c = btrfs_buffer_node(b);
  827. level = btrfs_header_level(&c->header);
  828. if (cow) {
  829. int wret;
  830. wret = btrfs_cow_block(trans, root, b,
  831. p->nodes[level + 1],
  832. p->slots[level + 1],
  833. &b);
  834. if (wret) {
  835. btrfs_block_release(root, b);
  836. return wret;
  837. }
  838. c = btrfs_buffer_node(b);
  839. }
  840. BUG_ON(!cow && ins_len);
  841. if (level != btrfs_header_level(&c->header))
  842. WARN_ON(1);
  843. level = btrfs_header_level(&c->header);
  844. p->nodes[level] = b;
  845. ret = check_block(root, p, level);
  846. if (ret)
  847. return -1;
  848. ret = bin_search(c, key, &slot);
  849. if (!btrfs_is_leaf(c)) {
  850. if (ret && slot > 0)
  851. slot -= 1;
  852. p->slots[level] = slot;
  853. if (ins_len > 0 && btrfs_header_nritems(&c->header) >=
  854. BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
  855. int sret = split_node(trans, root, p, level);
  856. BUG_ON(sret > 0);
  857. if (sret)
  858. return sret;
  859. b = p->nodes[level];
  860. c = btrfs_buffer_node(b);
  861. slot = p->slots[level];
  862. } else if (ins_len < 0) {
  863. int sret = balance_level(trans, root, p,
  864. level);
  865. if (sret)
  866. return sret;
  867. b = p->nodes[level];
  868. if (!b)
  869. goto again;
  870. c = btrfs_buffer_node(b);
  871. slot = p->slots[level];
  872. BUG_ON(btrfs_header_nritems(&c->header) == 1);
  873. }
  874. /* this is only true while dropping a snapshot */
  875. if (level == lowest_level)
  876. break;
  877. blocknr = btrfs_node_blockptr(c, slot);
  878. if (should_reada)
  879. reada_for_search(root, p, level, slot);
  880. b = read_tree_block(root, btrfs_node_blockptr(c, slot));
  881. } else {
  882. struct btrfs_leaf *l = (struct btrfs_leaf *)c;
  883. p->slots[level] = slot;
  884. if (ins_len > 0 && btrfs_leaf_free_space(root, l) <
  885. sizeof(struct btrfs_item) + ins_len) {
  886. int sret = split_leaf(trans, root, key,
  887. p, ins_len);
  888. BUG_ON(sret > 0);
  889. if (sret)
  890. return sret;
  891. }
  892. return ret;
  893. }
  894. }
  895. return 1;
  896. }
  897. /*
  898. * adjust the pointers going up the tree, starting at level
  899. * making sure the right key of each node is points to 'key'.
  900. * This is used after shifting pointers to the left, so it stops
  901. * fixing up pointers when a given leaf/node is not in slot 0 of the
  902. * higher levels
  903. *
  904. * If this fails to write a tree block, it returns -1, but continues
  905. * fixing up the blocks in ram so the tree is consistent.
  906. */
  907. static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root
  908. *root, struct btrfs_path *path, struct btrfs_disk_key
  909. *key, int level)
  910. {
  911. int i;
  912. int ret = 0;
  913. for (i = level; i < BTRFS_MAX_LEVEL; i++) {
  914. struct btrfs_node *t;
  915. int tslot = path->slots[i];
  916. if (!path->nodes[i])
  917. break;
  918. t = btrfs_buffer_node(path->nodes[i]);
  919. btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key));
  920. btrfs_mark_buffer_dirty(path->nodes[i]);
  921. if (tslot != 0)
  922. break;
  923. }
  924. return ret;
  925. }
  926. /*
  927. * try to push data from one node into the next node left in the
  928. * tree.
  929. *
  930. * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
  931. * error, and > 0 if there was no room in the left hand block.
  932. */
  933. static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
  934. *root, struct buffer_head *dst_buf, struct
  935. buffer_head *src_buf)
  936. {
  937. struct btrfs_node *src = btrfs_buffer_node(src_buf);
  938. struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
  939. int push_items = 0;
  940. int src_nritems;
  941. int dst_nritems;
  942. int ret = 0;
  943. src_nritems = btrfs_header_nritems(&src->header);
  944. dst_nritems = btrfs_header_nritems(&dst->header);
  945. push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
  946. if (push_items <= 0) {
  947. return 1;
  948. }
  949. if (src_nritems < push_items)
  950. push_items = src_nritems;
  951. btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs,
  952. push_items * sizeof(struct btrfs_key_ptr));
  953. if (push_items < src_nritems) {
  954. btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items,
  955. (src_nritems - push_items) *
  956. sizeof(struct btrfs_key_ptr));
  957. }
  958. btrfs_set_header_nritems(&src->header, src_nritems - push_items);
  959. btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
  960. btrfs_mark_buffer_dirty(src_buf);
  961. btrfs_mark_buffer_dirty(dst_buf);
  962. return ret;
  963. }
  964. /*
  965. * try to push data from one node into the next node right in the
  966. * tree.
  967. *
  968. * returns 0 if some ptrs were pushed, < 0 if there was some horrible
  969. * error, and > 0 if there was no room in the right hand block.
  970. *
  971. * this will only push up to 1/2 the contents of the left node over
  972. */
  973. static int balance_node_right(struct btrfs_trans_handle *trans, struct
  974. btrfs_root *root, struct buffer_head *dst_buf,
  975. struct buffer_head *src_buf)
  976. {
  977. struct btrfs_node *src = btrfs_buffer_node(src_buf);
  978. struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
  979. int push_items = 0;
  980. int max_push;
  981. int src_nritems;
  982. int dst_nritems;
  983. int ret = 0;
  984. src_nritems = btrfs_header_nritems(&src->header);
  985. dst_nritems = btrfs_header_nritems(&dst->header);
  986. push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
  987. if (push_items <= 0) {
  988. return 1;
  989. }
  990. max_push = src_nritems / 2 + 1;
  991. /* don't try to empty the node */
  992. if (max_push >= src_nritems)
  993. return 1;
  994. if (max_push < push_items)
  995. push_items = max_push;
  996. btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs,
  997. dst_nritems * sizeof(struct btrfs_key_ptr));
  998. btrfs_memcpy(root, dst, dst->ptrs,
  999. src->ptrs + src_nritems - push_items,
  1000. push_items * sizeof(struct btrfs_key_ptr));
  1001. btrfs_set_header_nritems(&src->header, src_nritems - push_items);
  1002. btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
  1003. btrfs_mark_buffer_dirty(src_buf);
  1004. btrfs_mark_buffer_dirty(dst_buf);
  1005. return ret;
  1006. }
  1007. /*
  1008. * helper function to insert a new root level in the tree.
  1009. * A new node is allocated, and a single item is inserted to
  1010. * point to the existing root
  1011. *
  1012. * returns zero on success or < 0 on failure.
  1013. */
  1014. static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
  1015. *root, struct btrfs_path *path, int level)
  1016. {
  1017. struct buffer_head *t;
  1018. struct btrfs_node *lower;
  1019. struct btrfs_node *c;
  1020. struct btrfs_disk_key *lower_key;
  1021. BUG_ON(path->nodes[level]);
  1022. BUG_ON(path->nodes[level-1] != root->node);
  1023. t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0);
  1024. if (IS_ERR(t))
  1025. return PTR_ERR(t);
  1026. c = btrfs_buffer_node(t);
  1027. memset(c, 0, root->blocksize);
  1028. btrfs_set_header_nritems(&c->header, 1);
  1029. btrfs_set_header_level(&c->header, level);
  1030. btrfs_set_header_blocknr(&c->header, bh_blocknr(t));
  1031. btrfs_set_header_generation(&c->header, trans->transid);
  1032. btrfs_set_header_owner(&c->header, root->root_key.objectid);
  1033. lower = btrfs_buffer_node(path->nodes[level-1]);
  1034. memcpy(c->header.fsid, root->fs_info->disk_super->fsid,
  1035. sizeof(c->header.fsid));
  1036. if (btrfs_is_leaf(lower))
  1037. lower_key = &((struct btrfs_leaf *)lower)->items[0].key;
  1038. else
  1039. lower_key = &lower->ptrs[0].key;
  1040. btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key,
  1041. sizeof(struct btrfs_disk_key));
  1042. btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1]));
  1043. btrfs_mark_buffer_dirty(t);
  1044. /* the super has an extra ref to root->node */
  1045. btrfs_block_release(root, root->node);
  1046. root->node = t;
  1047. get_bh(t);
  1048. path->nodes[level] = t;
  1049. path->slots[level] = 0;
  1050. return 0;
  1051. }
  1052. /*
  1053. * worker function to insert a single pointer in a node.
  1054. * the node should have enough room for the pointer already
  1055. *
  1056. * slot and level indicate where you want the key to go, and
  1057. * blocknr is the block the key points to.
  1058. *
  1059. * returns zero on success and < 0 on any error
  1060. */
  1061. static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
  1062. *root, struct btrfs_path *path, struct btrfs_disk_key
  1063. *key, u64 blocknr, int slot, int level)
  1064. {
  1065. struct btrfs_node *lower;
  1066. int nritems;
  1067. BUG_ON(!path->nodes[level]);
  1068. lower = btrfs_buffer_node(path->nodes[level]);
  1069. nritems = btrfs_header_nritems(&lower->header);
  1070. if (slot > nritems)
  1071. BUG();
  1072. if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
  1073. BUG();
  1074. if (slot != nritems) {
  1075. btrfs_memmove(root, lower, lower->ptrs + slot + 1,
  1076. lower->ptrs + slot,
  1077. (nritems - slot) * sizeof(struct btrfs_key_ptr));
  1078. }
  1079. btrfs_memcpy(root, lower, &lower->ptrs[slot].key,
  1080. key, sizeof(struct btrfs_disk_key));
  1081. btrfs_set_node_blockptr(lower, slot, blocknr);
  1082. btrfs_set_header_nritems(&lower->header, nritems + 1);
  1083. btrfs_mark_buffer_dirty(path->nodes[level]);
  1084. check_node(root, path, level);
  1085. return 0;
  1086. }
  1087. /*
  1088. * split the node at the specified level in path in two.
  1089. * The path is corrected to point to the appropriate node after the split
  1090. *
  1091. * Before splitting this tries to make some room in the node by pushing
  1092. * left and right, if either one works, it returns right away.
  1093. *
  1094. * returns 0 on success and < 0 on failure
  1095. */
  1096. static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
  1097. *root, struct btrfs_path *path, int level)
  1098. {
  1099. struct buffer_head *t;
  1100. struct btrfs_node *c;
  1101. struct buffer_head *split_buffer;
  1102. struct btrfs_node *split;
  1103. int mid;
  1104. int ret;
  1105. int wret;
  1106. u32 c_nritems;
  1107. t = path->nodes[level];
  1108. c = btrfs_buffer_node(t);
  1109. if (t == root->node) {
  1110. /* trying to split the root, lets make a new one */
  1111. ret = insert_new_root(trans, root, path, level + 1);
  1112. if (ret)
  1113. return ret;
  1114. } else {
  1115. ret = push_nodes_for_insert(trans, root, path, level);
  1116. t = path->nodes[level];
  1117. c = btrfs_buffer_node(t);
  1118. if (!ret &&
  1119. btrfs_header_nritems(&c->header) <
  1120. BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
  1121. return 0;
  1122. if (ret < 0)
  1123. return ret;
  1124. }
  1125. c_nritems = btrfs_header_nritems(&c->header);
  1126. split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0);
  1127. if (IS_ERR(split_buffer))
  1128. return PTR_ERR(split_buffer);
  1129. split = btrfs_buffer_node(split_buffer);
  1130. btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
  1131. btrfs_set_header_level(&split->header, btrfs_header_level(&c->header));
  1132. btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer));
  1133. btrfs_set_header_generation(&split->header, trans->transid);
  1134. btrfs_set_header_owner(&split->header, root->root_key.objectid);
  1135. memcpy(split->header.fsid, root->fs_info->disk_super->fsid,
  1136. sizeof(split->header.fsid));
  1137. mid = (c_nritems + 1) / 2;
  1138. btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid,
  1139. (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
  1140. btrfs_set_header_nritems(&split->header, c_nritems - mid);
  1141. btrfs_set_header_nritems(&c->header, mid);
  1142. ret = 0;
  1143. btrfs_mark_buffer_dirty(t);
  1144. btrfs_mark_buffer_dirty(split_buffer);
  1145. wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
  1146. bh_blocknr(split_buffer), path->slots[level + 1] + 1,
  1147. level + 1);
  1148. if (wret)
  1149. ret = wret;
  1150. if (path->slots[level] >= mid) {
  1151. path->slots[level] -= mid;
  1152. btrfs_block_release(root, t);
  1153. path->nodes[level] = split_buffer;
  1154. path->slots[level + 1] += 1;
  1155. } else {
  1156. btrfs_block_release(root, split_buffer);
  1157. }
  1158. return ret;
  1159. }
  1160. /*
  1161. * how many bytes are required to store the items in a leaf. start
  1162. * and nr indicate which items in the leaf to check. This totals up the
  1163. * space used both by the item structs and the item data
  1164. */
  1165. static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
  1166. {
  1167. int data_len;
  1168. int nritems = btrfs_header_nritems(&l->header);
  1169. int end = min(nritems, start + nr) - 1;
  1170. if (!nr)
  1171. return 0;
  1172. data_len = btrfs_item_end(l->items + start);
  1173. data_len = data_len - btrfs_item_offset(l->items + end);
  1174. data_len += sizeof(struct btrfs_item) * nr;
  1175. WARN_ON(data_len < 0);
  1176. return data_len;
  1177. }
  1178. /*
  1179. * The space between the end of the leaf items and
  1180. * the start of the leaf data. IOW, how much room
  1181. * the leaf has left for both items and data
  1182. */
  1183. int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
  1184. {
  1185. int nritems = btrfs_header_nritems(&leaf->header);
  1186. return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
  1187. }
  1188. /*
  1189. * push some data in the path leaf to the right, trying to free up at
  1190. * least data_size bytes. returns zero if the push worked, nonzero otherwise
  1191. *
  1192. * returns 1 if the push failed because the other node didn't have enough
  1193. * room, 0 if everything worked out and < 0 if there were major errors.
  1194. */
  1195. static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
  1196. *root, struct btrfs_path *path, int data_size)
  1197. {
  1198. struct buffer_head *left_buf = path->nodes[0];
  1199. struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf);
  1200. struct btrfs_leaf *right;
  1201. struct buffer_head *right_buf;
  1202. struct buffer_head *upper;
  1203. struct btrfs_node *upper_node;
  1204. int slot;
  1205. int i;
  1206. int free_space;
  1207. int push_space = 0;
  1208. int push_items = 0;
  1209. struct btrfs_item *item;
  1210. u32 left_nritems;
  1211. u32 right_nritems;
  1212. int ret;
  1213. slot = path->slots[1];
  1214. if (!path->nodes[1]) {
  1215. return 1;
  1216. }
  1217. upper = path->nodes[1];
  1218. upper_node = btrfs_buffer_node(upper);
  1219. if (slot >= btrfs_header_nritems(&upper_node->header) - 1) {
  1220. return 1;
  1221. }
  1222. right_buf = read_tree_block(root,
  1223. btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1));
  1224. right = btrfs_buffer_leaf(right_buf);
  1225. free_space = btrfs_leaf_free_space(root, right);
  1226. if (free_space < data_size + sizeof(struct btrfs_item)) {
  1227. btrfs_block_release(root, right_buf);
  1228. return 1;
  1229. }
  1230. /* cow and double check */
  1231. ret = btrfs_cow_block(trans, root, right_buf, upper,
  1232. slot + 1, &right_buf);
  1233. if (ret) {
  1234. btrfs_block_release(root, right_buf);
  1235. return 1;
  1236. }
  1237. right = btrfs_buffer_leaf(right_buf);
  1238. free_space = btrfs_leaf_free_space(root, right);
  1239. if (free_space < data_size + sizeof(struct btrfs_item)) {
  1240. btrfs_block_release(root, right_buf);
  1241. return 1;
  1242. }
  1243. left_nritems = btrfs_header_nritems(&left->header);
  1244. if (left_nritems == 0) {
  1245. btrfs_block_release(root, right_buf);
  1246. return 1;
  1247. }
  1248. for (i = left_nritems - 1; i >= 1; i--) {
  1249. item = left->items + i;
  1250. if (path->slots[0] == i)
  1251. push_space += data_size + sizeof(*item);
  1252. if (btrfs_item_size(item) + sizeof(*item) + push_space >
  1253. free_space)
  1254. break;
  1255. push_items++;
  1256. push_space += btrfs_item_size(item) + sizeof(*item);
  1257. }
  1258. if (push_items == 0) {
  1259. btrfs_block_release(root, right_buf);
  1260. return 1;
  1261. }
  1262. if (push_items == left_nritems)
  1263. WARN_ON(1);
  1264. right_nritems = btrfs_header_nritems(&right->header);
  1265. /* push left to right */
  1266. push_space = btrfs_item_end(left->items + left_nritems - push_items);
  1267. push_space -= leaf_data_end(root, left);
  1268. /* make room in the right data area */
  1269. btrfs_memmove(root, right, btrfs_leaf_data(right) +
  1270. leaf_data_end(root, right) - push_space,
  1271. btrfs_leaf_data(right) +
  1272. leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) -
  1273. leaf_data_end(root, right));
  1274. /* copy from the left data area */
  1275. btrfs_memcpy(root, right, btrfs_leaf_data(right) +
  1276. BTRFS_LEAF_DATA_SIZE(root) - push_space,
  1277. btrfs_leaf_data(left) + leaf_data_end(root, left),
  1278. push_space);
  1279. btrfs_memmove(root, right, right->items + push_items, right->items,
  1280. right_nritems * sizeof(struct btrfs_item));
  1281. /* copy the items from left to right */
  1282. btrfs_memcpy(root, right, right->items, left->items +
  1283. left_nritems - push_items,
  1284. push_items * sizeof(struct btrfs_item));
  1285. /* update the item pointers */
  1286. right_nritems += push_items;
  1287. btrfs_set_header_nritems(&right->header, right_nritems);
  1288. push_space = BTRFS_LEAF_DATA_SIZE(root);
  1289. for (i = 0; i < right_nritems; i++) {
  1290. btrfs_set_item_offset(right->items + i, push_space -
  1291. btrfs_item_size(right->items + i));
  1292. push_space = btrfs_item_offset(right->items + i);
  1293. }
  1294. left_nritems -= push_items;
  1295. btrfs_set_header_nritems(&left->header, left_nritems);
  1296. btrfs_mark_buffer_dirty(left_buf);
  1297. btrfs_mark_buffer_dirty(right_buf);
  1298. btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key,
  1299. &right->items[0].key, sizeof(struct btrfs_disk_key));
  1300. btrfs_mark_buffer_dirty(upper);
  1301. /* then fixup the leaf pointer in the path */
  1302. if (path->slots[0] >= left_nritems) {
  1303. path->slots[0] -= left_nritems;
  1304. btrfs_block_release(root, path->nodes[0]);
  1305. path->nodes[0] = right_buf;
  1306. path->slots[1] += 1;
  1307. } else {
  1308. btrfs_block_release(root, right_buf);
  1309. }
  1310. if (path->nodes[1])
  1311. check_node(root, path, 1);
  1312. return 0;
  1313. }
  1314. /*
  1315. * push some data in the path leaf to the left, trying to free up at
  1316. * least data_size bytes. returns zero if the push worked, nonzero otherwise
  1317. */
  1318. static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
  1319. *root, struct btrfs_path *path, int data_size)
  1320. {
  1321. struct buffer_head *right_buf = path->nodes[0];
  1322. struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf);
  1323. struct buffer_head *t;
  1324. struct btrfs_leaf *left;
  1325. int slot;
  1326. int i;
  1327. int free_space;
  1328. int push_space = 0;
  1329. int push_items = 0;
  1330. struct btrfs_item *item;
  1331. u32 old_left_nritems;
  1332. int ret = 0;
  1333. int wret;
  1334. slot = path->slots[1];
  1335. if (slot == 0) {
  1336. return 1;
  1337. }
  1338. if (!path->nodes[1]) {
  1339. return 1;
  1340. }
  1341. t = read_tree_block(root,
  1342. btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1));
  1343. left = btrfs_buffer_leaf(t);
  1344. free_space = btrfs_leaf_free_space(root, left);
  1345. if (free_space < data_size + sizeof(struct btrfs_item)) {
  1346. btrfs_block_release(root, t);
  1347. return 1;
  1348. }
  1349. /* cow and double check */
  1350. ret = btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t);
  1351. if (ret) {
  1352. /* we hit -ENOSPC, but it isn't fatal here */
  1353. btrfs_block_release(root, t);
  1354. return 1;
  1355. }
  1356. left = btrfs_buffer_leaf(t);
  1357. free_space = btrfs_leaf_free_space(root, left);
  1358. if (free_space < data_size + sizeof(struct btrfs_item)) {
  1359. btrfs_block_release(root, t);
  1360. return 1;
  1361. }
  1362. if (btrfs_header_nritems(&right->header) == 0) {
  1363. btrfs_block_release(root, t);
  1364. return 1;
  1365. }
  1366. for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) {
  1367. item = right->items + i;
  1368. if (path->slots[0] == i)
  1369. push_space += data_size + sizeof(*item);
  1370. if (btrfs_item_size(item) + sizeof(*item) + push_space >
  1371. free_space)
  1372. break;
  1373. push_items++;
  1374. push_space += btrfs_item_size(item) + sizeof(*item);
  1375. }
  1376. if (push_items == 0) {
  1377. btrfs_block_release(root, t);
  1378. return 1;
  1379. }
  1380. if (push_items == btrfs_header_nritems(&right->header))
  1381. WARN_ON(1);
  1382. /* push data from right to left */
  1383. btrfs_memcpy(root, left, left->items +
  1384. btrfs_header_nritems(&left->header),
  1385. right->items, push_items * sizeof(struct btrfs_item));
  1386. push_space = BTRFS_LEAF_DATA_SIZE(root) -
  1387. btrfs_item_offset(right->items + push_items -1);
  1388. btrfs_memcpy(root, left, btrfs_leaf_data(left) +
  1389. leaf_data_end(root, left) - push_space,
  1390. btrfs_leaf_data(right) +
  1391. btrfs_item_offset(right->items + push_items - 1),
  1392. push_space);
  1393. old_left_nritems = btrfs_header_nritems(&left->header);
  1394. BUG_ON(old_left_nritems < 0);
  1395. for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
  1396. u32 ioff = btrfs_item_offset(left->items + i);
  1397. btrfs_set_item_offset(left->items + i, ioff -
  1398. (BTRFS_LEAF_DATA_SIZE(root) -
  1399. btrfs_item_offset(left->items +
  1400. old_left_nritems - 1)));
  1401. }
  1402. btrfs_set_header_nritems(&left->header, old_left_nritems + push_items);
  1403. /* fixup right node */
  1404. push_space = btrfs_item_offset(right->items + push_items - 1) -
  1405. leaf_data_end(root, right);
  1406. btrfs_memmove(root, right, btrfs_leaf_data(right) +
  1407. BTRFS_LEAF_DATA_SIZE(root) - push_space,
  1408. btrfs_leaf_data(right) +
  1409. leaf_data_end(root, right), push_space);
  1410. btrfs_memmove(root, right, right->items, right->items + push_items,
  1411. (btrfs_header_nritems(&right->header) - push_items) *
  1412. sizeof(struct btrfs_item));
  1413. btrfs_set_header_nritems(&right->header,
  1414. btrfs_header_nritems(&right->header) -
  1415. push_items);
  1416. push_space = BTRFS_LEAF_DATA_SIZE(root);
  1417. for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
  1418. btrfs_set_item_offset(right->items + i, push_space -
  1419. btrfs_item_size(right->items + i));
  1420. push_space = btrfs_item_offset(right->items + i);
  1421. }
  1422. btrfs_mark_buffer_dirty(t);
  1423. btrfs_mark_buffer_dirty(right_buf);
  1424. wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
  1425. if (wret)
  1426. ret = wret;
  1427. /* then fixup the leaf pointer in the path */
  1428. if (path->slots[0] < push_items) {
  1429. path->slots[0] += old_left_nritems;
  1430. btrfs_block_release(root, path->nodes[0]);
  1431. path->nodes[0] = t;
  1432. path->slots[1] -= 1;
  1433. } else {
  1434. btrfs_block_release(root, t);
  1435. path->slots[0] -= push_items;
  1436. }
  1437. BUG_ON(path->slots[0] < 0);
  1438. if (path->nodes[1])
  1439. check_node(root, path, 1);
  1440. return ret;
  1441. }
  1442. /*
  1443. * split the path's leaf in two, making sure there is at least data_size
  1444. * available for the resulting leaf level of the path.
  1445. *
  1446. * returns 0 if all went well and < 0 on failure.
  1447. */
  1448. static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
  1449. *root, struct btrfs_key *ins_key,
  1450. struct btrfs_path *path, int data_size)
  1451. {
  1452. struct buffer_head *l_buf;
  1453. struct btrfs_leaf *l;
  1454. u32 nritems;
  1455. int mid;
  1456. int slot;
  1457. struct btrfs_leaf *right;
  1458. struct buffer_head *right_buffer;
  1459. int space_needed = data_size + sizeof(struct btrfs_item);
  1460. int data_copy_size;
  1461. int rt_data_off;
  1462. int i;
  1463. int ret = 0;
  1464. int wret;
  1465. int double_split = 0;
  1466. struct btrfs_disk_key disk_key;
  1467. /* first try to make some room by pushing left and right */
  1468. wret = push_leaf_left(trans, root, path, data_size);
  1469. if (wret < 0)
  1470. return wret;
  1471. if (wret) {
  1472. wret = push_leaf_right(trans, root, path, data_size);
  1473. if (wret < 0)
  1474. return wret;
  1475. }
  1476. l_buf = path->nodes[0];
  1477. l = btrfs_buffer_leaf(l_buf);
  1478. /* did the pushes work? */
  1479. if (btrfs_leaf_free_space(root, l) >=
  1480. sizeof(struct btrfs_item) + data_size)
  1481. return 0;
  1482. if (!path->nodes[1]) {
  1483. ret = insert_new_root(trans, root, path, 1);
  1484. if (ret)
  1485. return ret;
  1486. }
  1487. slot = path->slots[0];
  1488. nritems = btrfs_header_nritems(&l->header);
  1489. mid = (nritems + 1)/ 2;
  1490. right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
  1491. if (IS_ERR(right_buffer))
  1492. return PTR_ERR(right_buffer);
  1493. right = btrfs_buffer_leaf(right_buffer);
  1494. memset(&right->header, 0, sizeof(right->header));
  1495. btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
  1496. btrfs_set_header_generation(&right->header, trans->transid);
  1497. btrfs_set_header_owner(&right->header, root->root_key.objectid);
  1498. btrfs_set_header_level(&right->header, 0);
  1499. memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
  1500. sizeof(right->header.fsid));
  1501. if (mid <= slot) {
  1502. if (nritems == 1 ||
  1503. leaf_space_used(l, mid, nritems - mid) + space_needed >
  1504. BTRFS_LEAF_DATA_SIZE(root)) {
  1505. if (slot >= nritems) {
  1506. btrfs_cpu_key_to_disk(&disk_key, ins_key);
  1507. btrfs_set_header_nritems(&right->header, 0);
  1508. wret = insert_ptr(trans, root, path,
  1509. &disk_key,
  1510. bh_blocknr(right_buffer),
  1511. path->slots[1] + 1, 1);
  1512. if (wret)
  1513. ret = wret;
  1514. btrfs_block_release(root, path->nodes[0]);
  1515. path->nodes[0] = right_buffer;
  1516. path->slots[0] = 0;
  1517. path->slots[1] += 1;
  1518. return ret;
  1519. }
  1520. mid = slot;
  1521. double_split = 1;
  1522. }
  1523. } else {
  1524. if (leaf_space_used(l, 0, mid + 1) + space_needed >
  1525. BTRFS_LEAF_DATA_SIZE(root)) {
  1526. if (slot == 0) {
  1527. btrfs_cpu_key_to_disk(&disk_key, ins_key);
  1528. btrfs_set_header_nritems(&right->header, 0);
  1529. wret = insert_ptr(trans, root, path,
  1530. &disk_key,
  1531. bh_blocknr(right_buffer),
  1532. path->slots[1], 1);
  1533. if (wret)
  1534. ret = wret;
  1535. btrfs_block_release(root, path->nodes[0]);
  1536. path->nodes[0] = right_buffer;
  1537. path->slots[0] = 0;
  1538. if (path->slots[1] == 0) {
  1539. wret = fixup_low_keys(trans, root,
  1540. path, &disk_key, 1);
  1541. if (wret)
  1542. ret = wret;
  1543. }
  1544. return ret;
  1545. }
  1546. mid = slot;
  1547. double_split = 1;
  1548. }
  1549. }
  1550. btrfs_set_header_nritems(&right->header, nritems - mid);
  1551. data_copy_size = btrfs_item_end(l->items + mid) -
  1552. leaf_data_end(root, l);
  1553. btrfs_memcpy(root, right, right->items, l->items + mid,
  1554. (nritems - mid) * sizeof(struct btrfs_item));
  1555. btrfs_memcpy(root, right,
  1556. btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
  1557. data_copy_size, btrfs_leaf_data(l) +
  1558. leaf_data_end(root, l), data_copy_size);
  1559. rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
  1560. btrfs_item_end(l->items + mid);
  1561. for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
  1562. u32 ioff = btrfs_item_offset(right->items + i);
  1563. btrfs_set_item_offset(right->items + i, ioff + rt_data_off);
  1564. }
  1565. btrfs_set_header_nritems(&l->header, mid);
  1566. ret = 0;
  1567. wret = insert_ptr(trans, root, path, &right->items[0].key,
  1568. bh_blocknr(right_buffer), path->slots[1] + 1, 1);
  1569. if (wret)
  1570. ret = wret;
  1571. btrfs_mark_buffer_dirty(right_buffer);
  1572. btrfs_mark_buffer_dirty(l_buf);
  1573. BUG_ON(path->slots[0] != slot);
  1574. if (mid <= slot) {
  1575. btrfs_block_release(root, path->nodes[0]);
  1576. path->nodes[0] = right_buffer;
  1577. path->slots[0] -= mid;
  1578. path->slots[1] += 1;
  1579. } else
  1580. btrfs_block_release(root, right_buffer);
  1581. BUG_ON(path->slots[0] < 0);
  1582. check_node(root, path, 1);
  1583. if (!double_split)
  1584. return ret;
  1585. right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
  1586. if (IS_ERR(right_buffer))
  1587. return PTR_ERR(right_buffer);
  1588. right = btrfs_buffer_leaf(right_buffer);
  1589. memset(&right->header, 0, sizeof(right->header));
  1590. btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
  1591. btrfs_set_header_generation(&right->header, trans->transid);
  1592. btrfs_set_header_owner(&right->header, root->root_key.objectid);
  1593. btrfs_set_header_level(&right->header, 0);
  1594. memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
  1595. sizeof(right->header.fsid));
  1596. btrfs_cpu_key_to_disk(&disk_key, ins_key);
  1597. btrfs_set_header_nritems(&right->header, 0);
  1598. wret = insert_ptr(trans, root, path,
  1599. &disk_key,
  1600. bh_blocknr(right_buffer),
  1601. path->slots[1], 1);
  1602. if (wret)
  1603. ret = wret;
  1604. if (path->slots[1] == 0) {
  1605. wret = fixup_low_keys(trans, root, path, &disk_key, 1);
  1606. if (wret)
  1607. ret = wret;
  1608. }
  1609. btrfs_block_release(root, path->nodes[0]);
  1610. path->nodes[0] = right_buffer;
  1611. path->slots[0] = 0;
  1612. check_node(root, path, 1);
  1613. check_leaf(root, path, 0);
  1614. return ret;
  1615. }
  1616. int btrfs_truncate_item(struct btrfs_trans_handle *trans,
  1617. struct btrfs_root *root,
  1618. struct btrfs_path *path,
  1619. u32 new_size)
  1620. {
  1621. int ret = 0;
  1622. int slot;
  1623. int slot_orig;
  1624. struct btrfs_leaf *leaf;
  1625. struct buffer_head *leaf_buf;
  1626. u32 nritems;
  1627. unsigned int data_end;
  1628. unsigned int old_data_start;
  1629. unsigned int old_size;
  1630. unsigned int size_diff;
  1631. int i;
  1632. slot_orig = path->slots[0];
  1633. leaf_buf = path->nodes[0];
  1634. leaf = btrfs_buffer_leaf(leaf_buf);
  1635. nritems = btrfs_header_nritems(&leaf->header);
  1636. data_end = leaf_data_end(root, leaf);
  1637. slot = path->slots[0];
  1638. old_data_start = btrfs_item_offset(leaf->items + slot);
  1639. old_size = btrfs_item_size(leaf->items + slot);
  1640. BUG_ON(old_size <= new_size);
  1641. size_diff = old_size - new_size;
  1642. BUG_ON(slot < 0);
  1643. BUG_ON(slot >= nritems);
  1644. /*
  1645. * item0..itemN ... dataN.offset..dataN.size .. data0.size
  1646. */
  1647. /* first correct the data pointers */
  1648. for (i = slot; i < nritems; i++) {
  1649. u32 ioff = btrfs_item_offset(leaf->items + i);
  1650. btrfs_set_item_offset(leaf->items + i,
  1651. ioff + size_diff);
  1652. }
  1653. /* shift the data */
  1654. btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
  1655. data_end + size_diff, btrfs_leaf_data(leaf) +
  1656. data_end, old_data_start + new_size - data_end);
  1657. btrfs_set_item_size(leaf->items + slot, new_size);
  1658. btrfs_mark_buffer_dirty(leaf_buf);
  1659. ret = 0;
  1660. if (btrfs_leaf_free_space(root, leaf) < 0)
  1661. BUG();
  1662. check_leaf(root, path, 0);
  1663. return ret;
  1664. }
  1665. int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
  1666. *root, struct btrfs_path *path, u32 data_size)
  1667. {
  1668. int ret = 0;
  1669. int slot;
  1670. int slot_orig;
  1671. struct btrfs_leaf *leaf;
  1672. struct buffer_head *leaf_buf;
  1673. u32 nritems;
  1674. unsigned int data_end;
  1675. unsigned int old_data;
  1676. unsigned int old_size;
  1677. int i;
  1678. slot_orig = path->slots[0];
  1679. leaf_buf = path->nodes[0];
  1680. leaf = btrfs_buffer_leaf(leaf_buf);
  1681. nritems = btrfs_header_nritems(&leaf->header);
  1682. data_end = leaf_data_end(root, leaf);
  1683. if (btrfs_leaf_free_space(root, leaf) < data_size)
  1684. BUG();
  1685. slot = path->slots[0];
  1686. old_data = btrfs_item_end(leaf->items + slot);
  1687. BUG_ON(slot < 0);
  1688. BUG_ON(slot >= nritems);
  1689. /*
  1690. * item0..itemN ... dataN.offset..dataN.size .. data0.size
  1691. */
  1692. /* first correct the data pointers */
  1693. for (i = slot; i < nritems; i++) {
  1694. u32 ioff = btrfs_item_offset(leaf->items + i);
  1695. btrfs_set_item_offset(leaf->items + i,
  1696. ioff - data_size);
  1697. }
  1698. /* shift the data */
  1699. btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
  1700. data_end - data_size, btrfs_leaf_data(leaf) +
  1701. data_end, old_data - data_end);
  1702. data_end = old_data;
  1703. old_size = btrfs_item_size(leaf->items + slot);
  1704. btrfs_set_item_size(leaf->items + slot, old_size + data_size);
  1705. btrfs_mark_buffer_dirty(leaf_buf);
  1706. ret = 0;
  1707. if (btrfs_leaf_free_space(root, leaf) < 0)
  1708. BUG();
  1709. check_leaf(root, path, 0);
  1710. return ret;
  1711. }
  1712. /*
  1713. * Given a key and some data, insert an item into the tree.
  1714. * This does all the path init required, making room in the tree if needed.
  1715. */
  1716. int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
  1717. *root, struct btrfs_path *path, struct btrfs_key
  1718. *cpu_key, u32 data_size)
  1719. {
  1720. int ret = 0;
  1721. int slot;
  1722. int slot_orig;
  1723. struct btrfs_leaf *leaf;
  1724. struct buffer_head *leaf_buf;
  1725. u32 nritems;
  1726. unsigned int data_end;
  1727. struct btrfs_disk_key disk_key;
  1728. btrfs_cpu_key_to_disk(&disk_key, cpu_key);
  1729. /* create a root if there isn't one */
  1730. if (!root->node)
  1731. BUG();
  1732. ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
  1733. if (ret == 0) {
  1734. return -EEXIST;
  1735. }
  1736. if (ret < 0)
  1737. goto out;
  1738. slot_orig = path->slots[0];
  1739. leaf_buf = path->nodes[0];
  1740. leaf = btrfs_buffer_leaf(leaf_buf);
  1741. nritems = btrfs_header_nritems(&leaf->header);
  1742. data_end = leaf_data_end(root, leaf);
  1743. if (btrfs_leaf_free_space(root, leaf) <
  1744. sizeof(struct btrfs_item) + data_size) {
  1745. BUG();
  1746. }
  1747. slot = path->slots[0];
  1748. BUG_ON(slot < 0);
  1749. if (slot != nritems) {
  1750. int i;
  1751. unsigned int old_data = btrfs_item_end(leaf->items + slot);
  1752. /*
  1753. * item0..itemN ... dataN.offset..dataN.size .. data0.size
  1754. */
  1755. /* first correct the data pointers */
  1756. for (i = slot; i < nritems; i++) {
  1757. u32 ioff = btrfs_item_offset(leaf->items + i);
  1758. btrfs_set_item_offset(leaf->items + i,
  1759. ioff - data_size);
  1760. }
  1761. /* shift the items */
  1762. btrfs_memmove(root, leaf, leaf->items + slot + 1,
  1763. leaf->items + slot,
  1764. (nritems - slot) * sizeof(struct btrfs_item));
  1765. /* shift the data */
  1766. btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
  1767. data_end - data_size, btrfs_leaf_data(leaf) +
  1768. data_end, old_data - data_end);
  1769. data_end = old_data;
  1770. }
  1771. /* setup the item for the new data */
  1772. btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key,
  1773. sizeof(struct btrfs_disk_key));
  1774. btrfs_set_item_offset(leaf->items + slot, data_end - data_size);
  1775. btrfs_set_item_size(leaf->items + slot, data_size);
  1776. btrfs_set_header_nritems(&leaf->header, nritems + 1);
  1777. btrfs_mark_buffer_dirty(leaf_buf);
  1778. ret = 0;
  1779. if (slot == 0)
  1780. ret = fixup_low_keys(trans, root, path, &disk_key, 1);
  1781. if (btrfs_leaf_free_space(root, leaf) < 0)
  1782. BUG();
  1783. check_leaf(root, path, 0);
  1784. out:
  1785. return ret;
  1786. }
  1787. /*
  1788. * Given a key and some data, insert an item into the tree.
  1789. * This does all the path init required, making room in the tree if needed.
  1790. */
  1791. int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
  1792. *root, struct btrfs_key *cpu_key, void *data, u32
  1793. data_size)
  1794. {
  1795. int ret = 0;
  1796. struct btrfs_path *path;
  1797. u8 *ptr;
  1798. path = btrfs_alloc_path();
  1799. BUG_ON(!path);
  1800. ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
  1801. if (!ret) {
  1802. ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
  1803. path->slots[0], u8);
  1804. btrfs_memcpy(root, path->nodes[0]->b_data,
  1805. ptr, data, data_size);
  1806. btrfs_mark_buffer_dirty(path->nodes[0]);
  1807. }
  1808. btrfs_free_path(path);
  1809. return ret;
  1810. }
  1811. /*
  1812. * delete the pointer from a given node.
  1813. *
  1814. * If the delete empties a node, the node is removed from the tree,
  1815. * continuing all the way the root if required. The root is converted into
  1816. * a leaf if all the nodes are emptied.
  1817. */
  1818. static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  1819. struct btrfs_path *path, int level, int slot)
  1820. {
  1821. struct btrfs_node *node;
  1822. struct buffer_head *parent = path->nodes[level];
  1823. u32 nritems;
  1824. int ret = 0;
  1825. int wret;
  1826. node = btrfs_buffer_node(parent);
  1827. nritems = btrfs_header_nritems(&node->header);
  1828. if (slot != nritems -1) {
  1829. btrfs_memmove(root, node, node->ptrs + slot,
  1830. node->ptrs + slot + 1,
  1831. sizeof(struct btrfs_key_ptr) *
  1832. (nritems - slot - 1));
  1833. }
  1834. nritems--;
  1835. btrfs_set_header_nritems(&node->header, nritems);
  1836. if (nritems == 0 && parent == root->node) {
  1837. struct btrfs_header *header = btrfs_buffer_header(root->node);
  1838. BUG_ON(btrfs_header_level(header) != 1);
  1839. /* just turn the root into a leaf and break */
  1840. btrfs_set_header_level(header, 0);
  1841. } else if (slot == 0) {
  1842. wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key,
  1843. level + 1);
  1844. if (wret)
  1845. ret = wret;
  1846. }
  1847. btrfs_mark_buffer_dirty(parent);
  1848. return ret;
  1849. }
  1850. /*
  1851. * delete the item at the leaf level in path. If that empties
  1852. * the leaf, remove it from the tree
  1853. */
  1854. int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  1855. struct btrfs_path *path)
  1856. {
  1857. int slot;
  1858. struct btrfs_leaf *leaf;
  1859. struct buffer_head *leaf_buf;
  1860. int doff;
  1861. int dsize;
  1862. int ret = 0;
  1863. int wret;
  1864. u32 nritems;
  1865. leaf_buf = path->nodes[0];
  1866. leaf = btrfs_buffer_leaf(leaf_buf);
  1867. slot = path->slots[0];
  1868. doff = btrfs_item_offset(leaf->items + slot);
  1869. dsize = btrfs_item_size(leaf->items + slot);
  1870. nritems = btrfs_header_nritems(&leaf->header);
  1871. if (slot != nritems - 1) {
  1872. int i;
  1873. int data_end = leaf_data_end(root, leaf);
  1874. btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
  1875. data_end + dsize,
  1876. btrfs_leaf_data(leaf) + data_end,
  1877. doff - data_end);
  1878. for (i = slot + 1; i < nritems; i++) {
  1879. u32 ioff = btrfs_item_offset(leaf->items + i);
  1880. btrfs_set_item_offset(leaf->items + i, ioff + dsize);
  1881. }
  1882. btrfs_memmove(root, leaf, leaf->items + slot,
  1883. leaf->items + slot + 1,
  1884. sizeof(struct btrfs_item) *
  1885. (nritems - slot - 1));
  1886. }
  1887. btrfs_set_header_nritems(&leaf->header, nritems - 1);
  1888. nritems--;
  1889. /* delete the leaf if we've emptied it */
  1890. if (nritems == 0) {
  1891. if (leaf_buf == root->node) {
  1892. btrfs_set_header_level(&leaf->header, 0);
  1893. } else {
  1894. clean_tree_block(trans, root, leaf_buf);
  1895. wait_on_buffer(leaf_buf);
  1896. wret = del_ptr(trans, root, path, 1, path->slots[1]);
  1897. if (wret)
  1898. ret = wret;
  1899. wret = btrfs_free_extent(trans, root,
  1900. bh_blocknr(leaf_buf), 1, 1);
  1901. if (wret)
  1902. ret = wret;
  1903. }
  1904. } else {
  1905. int used = leaf_space_used(leaf, 0, nritems);
  1906. if (slot == 0) {
  1907. wret = fixup_low_keys(trans, root, path,
  1908. &leaf->items[0].key, 1);
  1909. if (wret)
  1910. ret = wret;
  1911. }
  1912. /* delete the leaf if it is mostly empty */
  1913. if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
  1914. /* push_leaf_left fixes the path.
  1915. * make sure the path still points to our leaf
  1916. * for possible call to del_ptr below
  1917. */
  1918. slot = path->slots[1];
  1919. get_bh(leaf_buf);
  1920. wret = push_leaf_left(trans, root, path, 1);
  1921. if (wret < 0 && wret != -ENOSPC)
  1922. ret = wret;
  1923. if (path->nodes[0] == leaf_buf &&
  1924. btrfs_header_nritems(&leaf->header)) {
  1925. wret = push_leaf_right(trans, root, path, 1);
  1926. if (wret < 0 && wret != -ENOSPC)
  1927. ret = wret;
  1928. }
  1929. if (btrfs_header_nritems(&leaf->header) == 0) {
  1930. u64 blocknr = bh_blocknr(leaf_buf);
  1931. clean_tree_block(trans, root, leaf_buf);
  1932. wait_on_buffer(leaf_buf);
  1933. wret = del_ptr(trans, root, path, 1, slot);
  1934. if (wret)
  1935. ret = wret;
  1936. btrfs_block_release(root, leaf_buf);
  1937. wret = btrfs_free_extent(trans, root, blocknr,
  1938. 1, 1);
  1939. if (wret)
  1940. ret = wret;
  1941. } else {
  1942. btrfs_mark_buffer_dirty(leaf_buf);
  1943. btrfs_block_release(root, leaf_buf);
  1944. }
  1945. } else {
  1946. btrfs_mark_buffer_dirty(leaf_buf);
  1947. }
  1948. }
  1949. return ret;
  1950. }
  1951. /*
  1952. * walk up the tree as far as required to find the next leaf.
  1953. * returns 0 if it found something or 1 if there are no greater leaves.
  1954. * returns < 0 on io errors.
  1955. */
  1956. int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
  1957. {
  1958. int slot;
  1959. int level = 1;
  1960. u64 blocknr;
  1961. struct buffer_head *c;
  1962. struct btrfs_node *c_node;
  1963. struct buffer_head *next = NULL;
  1964. while(level < BTRFS_MAX_LEVEL) {
  1965. if (!path->nodes[level])
  1966. return 1;
  1967. slot = path->slots[level] + 1;
  1968. c = path->nodes[level];
  1969. c_node = btrfs_buffer_node(c);
  1970. if (slot >= btrfs_header_nritems(&c_node->header)) {
  1971. level++;
  1972. continue;
  1973. }
  1974. blocknr = btrfs_node_blockptr(c_node, slot);
  1975. if (next)
  1976. btrfs_block_release(root, next);
  1977. if (path->reada)
  1978. reada_for_search(root, path, level, slot);
  1979. next = read_tree_block(root, blocknr);
  1980. break;
  1981. }
  1982. path->slots[level] = slot;
  1983. while(1) {
  1984. level--;
  1985. c = path->nodes[level];
  1986. btrfs_block_release(root, c);
  1987. path->nodes[level] = next;
  1988. path->slots[level] = 0;
  1989. if (!level)
  1990. break;
  1991. if (path->reada)
  1992. reada_for_search(root, path, level, 0);
  1993. next = read_tree_block(root,
  1994. btrfs_node_blockptr(btrfs_buffer_node(next), 0));
  1995. }
  1996. return 0;
  1997. }